Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux into drm-next

This backmerges drm-fixes into drm-next mainly for the amdkfd stuff, I'm not 100% confident, but it builds and the amdkfd folks can fix anything up. Signed-off-by: Dave Airlie <airlied@redhat.com> Conflicts: drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
2024-12-30 16:13:54 +08:00 · 2015-01-29 11:45:31 +10:00 · 2015-01-29 11:45:31 +10:00 · b3869b17fd
commit b3869b17fd
parent 7b83741bf7 c59c961ca5
234 changed files with 2574 additions and 2243 deletions
--- a/Documentation/ABI/testing/sysfs-platform-dell-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@ -1,60 +0,0 @@
-What:		/sys/class/leds/dell::kbd_backlight/als_setting
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to control the automatic keyboard
-		illumination mode on some systems that have an ambient
-		light sensor. Write 1 to this file to enable the auto
-		mode, 0 to disable it.
-
-What:		/sys/class/leds/dell::kbd_backlight/start_triggers
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to control the input triggers that
-		turn on the keyboard backlight illumination that is
-		disabled because of inactivity.
-		Read the file to see the triggers available. The ones
-		enabled are preceded by '+', those disabled by '-'.
-
-		To enable a trigger, write its name preceded by '+' to
-		this file. To disable a trigger, write its name preceded
-		by '-' instead.
-
-		For example, to enable the keyboard as trigger run:
-		    echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
-		To disable it:
-		    echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
-
-		Note that not all the available triggers can be configured.
-
-What:		/sys/class/leds/dell::kbd_backlight/stop_timeout
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to specify the interval after which the
-		keyboard illumination is disabled because of inactivity.
-		The timeouts are expressed in seconds, minutes, hours and
-		days, for which the symbols are 's', 'm', 'h' and 'd'
-		respectively.
-
-		To configure the timeout, write to this file a value along
-		with any the above units. If no unit is specified, the value
-		is assumed to be expressed in seconds.
-
-		For example, to set the timeout to 10 minutes run:
-		    echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
-
-		Note that when this file is read, the returned value might be
-		expressed in a different unit than the one used when the timeout
-		was set.
-
-		Also note that only some timeouts are supported and that
-		some systems might fall back to a specific timeout in case
-		an invalid timeout is written to this file.
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@ -23,7 +23,7 @@ Required nodes:
    range of 0x200 bytes.

 - syscon: the root node of the Integrator platforms must have a
-  system controller node pointong to the control registers,
+  system controller node pointing to the control registers,
  with the compatible string
  "arm,integrator-ap-syscon"
  "arm,integrator-cp-syscon"
--- a/Documentation/devicetree/bindings/arm/fw-cfg.txt
+++ b/Documentation/devicetree/bindings/arm/fw-cfg.txt
@ -0,0 +1,72 @@
+* QEMU Firmware Configuration bindings for ARM
+
+QEMU's arm-softmmu and aarch64-softmmu emulation / virtualization targets
+provide the following Firmware Configuration interface on the "virt" machine
+type:
+
+- A write-only, 16-bit wide selector (or control) register,
+- a read-write, 64-bit wide data register.
+
+QEMU exposes the control and data register to ARM guests as memory mapped
+registers; their location is communicated to the guest's UEFI firmware in the
+DTB that QEMU places at the bottom of the guest's DRAM.
+
+The guest writes a selector value (a key) to the selector register, and then
+can read the corresponding data (produced by QEMU) via the data register. If
+the selected entry is writable, the guest can rewrite it through the data
+register.
+
+The selector register takes keys in big endian byte order.
+
+The data register allows accesses with 8, 16, 32 and 64-bit width (only at
+offset 0 of the register). Accesses larger than a byte are interpreted as
+arrays, bundled together only for better performance. The bytes constituting
+such a word, in increasing address order, correspond to the bytes that would
+have been transferred by byte-wide accesses in chronological order.
+
+The interface allows guest firmware to download various parameters and blobs
+that affect how the firmware works and what tables it installs for the guest
+OS. For example, boot order of devices, ACPI tables, SMBIOS tables, kernel and
+initrd images for direct kernel booting, virtual machine UUID, SMP information,
+virtual NUMA topology, and so on.
+
+The authoritative registry of the valid selector values and their meanings is
+the QEMU source code; the structure of the data blobs corresponding to the
+individual key values is also defined in the QEMU source code.
+
+The presence of the registers can be verified by selecting the "signature" blob
+with key 0x0000, and reading four bytes from the data register. The returned
+signature is "QEMU".
+
+The outermost protocol (involving the write / read sequences of the control and
+data registers) is expected to be versioned, and/or described by feature bits.
+The interface revision / feature bitmap can be retrieved with key 0x0001. The
+blob to be read from the data register has size 4, and it is to be interpreted
+as a uint32_t value in little endian byte order. The current value
+(corresponding to the above outer protocol) is zero.
+
+The guest kernel is not expected to use these registers (although it is
+certainly allowed to); the device tree bindings are documented here because
+this is where device tree bindings reside in general.
+
+Required properties:
+
+- compatible: "qemu,fw-cfg-mmio".
+
+- reg: the MMIO region used by the device.
+  * Bytes 0x0 to 0x7 cover the data register.
+  * Bytes 0x8 to 0x9 cover the selector register.
+  * Further registers may be appended to the region in case of future interface
+    revisions / feature bits.
+
+Example:
+
+/ {
+	#size-cells = <0x2>;
+	#address-cells = <0x2>;
+
+	fw-cfg@9020000 {
+		compatible = "qemu,fw-cfg-mmio";
+		reg = <0x0 0x9020000 0x0 0xa>;
+	};
+};
--- a/Documentation/devicetree/bindings/graph.txt
+++ b/Documentation/devicetree/bindings/graph.txt
@ -19,7 +19,7 @@ type of the connections, they just map their existence. Specific properties
 may be described by specialized bindings depending on the type of connection.

 To see how this binding applies to video pipelines, for example, see
-Documentation/device-tree/bindings/media/video-interfaces.txt.
+Documentation/devicetree/bindings/media/video-interfaces.txt.
 Here the ports describe data interfaces, and the links between them are
 the connecting data buses. A single port with multiple connections can
 correspond to multiple devices being connected to the same physical bus.
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@ -9,7 +9,6 @@ ad	Avionic Design GmbH
 adapteva	Adapteva, Inc.
 adi	Analog Devices, Inc.
 aeroflexgaisler	Aeroflex Gaisler AB
-ak	Asahi Kasei Corp.
 allwinner	Allwinner Technology Co., Ltd.
 altr	Altera Corp.
 amcc	Applied Micro Circuits Corporation (APM, formally AMCC)
@ -20,6 +19,7 @@ amstaos	AMS-Taos Inc.
 apm	Applied Micro Circuits Corporation (APM)
 arm	ARM Ltd.
 armadeus	ARMadeus Systems SARL
+asahi-kasei	Asahi Kasei Corp.
 atmel	Atmel Corporation
 auo	AU Optronics Corporation
 avago	Avago Technologies
@ -130,6 +130,7 @@ pixcir  PIXCIR MICROELECTRONICS Co., Ltd
 powervr	PowerVR (deprecated, use img)
 qca	Qualcomm Atheros, Inc.
 qcom	Qualcomm Technologies, Inc
+qemu	QEMU, a generic and open source machine emulator and virtualizer
 qnap	QNAP Systems, Inc.
 radxa	Radxa
 raidsonic	RaidSonic Technology GmbH
@ -171,6 +172,7 @@ usi	Universal Scientific Industrial Co., Ltd.
 v3	V3 Semiconductor
 variscite	Variscite Ltd.
 via	VIA Technologies, Inc.
+virtio	Virtual I/O Device Specification, developed by the OASIS consortium
 voipac	Voipac Technologies s.r.o.
 winbond Winbond Electronics corp.
 wlf	Wolfson Microelectronics
--- a/15
+++ b/15
@ -698,7 +698,7 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org/
 S:	Supported
 F:	sound/soc/blackfin/*
- 
+
 ANALOG DEVICES INC IIO DRIVERS
 M:	Lars-Peter Clausen <lars@metafoo.de>
 M:	Michael Hennerich <Michael.Hennerich@analog.com>
@ -4752,14 +4752,14 @@ S:	Supported
 F:	drivers/net/ethernet/ibm/ibmveth.*

 IBM Power Virtual SCSI Device Drivers
-M:	Nathan Fontenot <nfont@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvscsi*
 F:	drivers/scsi/ibmvscsi/viosrp.h

 IBM Power Virtual FC Device Drivers
-M:	Brian King <brking@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvfc*
@ -4948,7 +4948,6 @@ K:	\b(ABS|SYN)_MT_
 INTEL C600 SERIES SAS CONTROLLER DRIVER
 M:	Intel SCU Linux support <intel-linux-scu@intel.com>
 M:	Artur Paszkiewicz <artur.paszkiewicz@intel.com>
-M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-scsi@vger.kernel.org
 T:	git git://git.code.sf.net/p/intel-sas/isci
 S:	Supported
@ -7026,14 +7025,12 @@ OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Grant Likely <grant.likely@linaro.org>
 M:	Rob Herring <robh+dt@kernel.org>
 L:	devicetree@vger.kernel.org
-W:	http://fdt.secretlab.ca
-T:	git git://git.secretlab.ca/git/linux-2.6.git
+W:	http://www.devicetree.org/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/glikely/linux.git
 S:	Maintained
 F:	drivers/of/
 F:	include/linux/of*.h
 F:	scripts/dtc/
-K:	of_get_property
-K:	of_match_table

 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:	Rob Herring <robh+dt@kernel.org>
@ -7278,7 +7275,7 @@ S:	Maintained
 F:	drivers/pci/host/*layerscape*

 PCI DRIVER FOR IMX6
-M:	Richard Zhu <r65037@freescale.com>
+M:	Richard Zhu <Richard.Zhu@freescale.com>
 M:	Lucas Stach <l.stach@pengutronix.de>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Diseased Newt

 # *DOCUMENTATION*
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@ -285,8 +285,12 @@ pcibios_claim_one_bus(struct pci_bus *b)
 			if (r->parent || !r->start || !r->flags)
 				continue;
 			if (pci_has_flag(PCI_PROBE_ONLY) ||
-			    (r->flags & IORESOURCE_PCI_FIXED))
-				pci_claim_resource(dev, i);
+			    (r->flags & IORESOURCE_PCI_FIXED)) {
+				if (pci_claim_resource(dev, i) == 0)
+					continue;
+
+				pci_claim_bridge_resource(dev, i);
+			}
 		}
 	}

--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@ -1257,6 +1257,8 @@
 				tx-fifo-resize;
 				maximum-speed = "super-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};

@ -1278,6 +1280,8 @@
 				tx-fifo-resize;
 				maximum-speed = "high-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};

@ -1299,6 +1303,8 @@
 				tx-fifo-resize;
 				maximum-speed = "high-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};

--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@ -369,7 +369,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fa0000 0x4000>;
-				clocks = <&clks 106>, <&clks 36>;
+				clocks = <&clks 106>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <36>;
 			};
@ -388,7 +388,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fa8000 0x4000>;
-				clocks = <&clks 107>, <&clks 36>;
+				clocks = <&clks 107>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <41>;
 			};
@ -429,7 +429,7 @@
 			pwm4: pwm@53fc8000 {
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				reg = <0x53fc8000 0x4000>;
-				clocks = <&clks 108>, <&clks 36>;
+				clocks = <&clks 108>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <42>;
 			};
@ -476,7 +476,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fe0000 0x4000>;
-				clocks = <&clks 105>, <&clks 36>;
+				clocks = <&clks 105>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <26>;
 			};
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@ -166,12 +166,12 @@
 		#address-cells = <1>;
 		#size-cells = <0>;

-		ethphy1: ethernet-phy@0 {
-			reg = <0>;
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
 		};

-		ethphy2: ethernet-phy@1 {
-			reg = <1>;
+		ethphy2: ethernet-phy@2 {
+			reg = <2>;
 		};
 	};
 };
--- a/arch/arm/boot/dts/tegra20-seaboard.dts
+++ b/arch/arm/boot/dts/tegra20-seaboard.dts
@ -406,7 +406,7 @@
 		clock-frequency = <400000>;

 		magnetometer@c {
-			compatible = "ak,ak8975";
+			compatible = "asahi-kasei,ak8975";
 			reg = <0xc>;
 			interrupt-parent = <&gpio>;
 			interrupts = <TEGRA_GPIO(N, 5) IRQ_TYPE_LEVEL_HIGH>;
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@ -253,21 +253,22 @@
 	.endm

 	.macro	restore_user_regs, fast = 0, offset = 0
-	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
+	mov	r2, sp
+	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
 	msr	spsr_cxsf, r1			@ save in spsr_svc
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
 	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r1, r2, [sp]			@ clear the exclusive monitor
+	strex	r1, r2, [r2]			@ clear the exclusive monitor
 #endif
 	.if	\fast
-	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
+	ldmdb	r2, {r1 - lr}^			@ get calling r1 - lr
 	.else
-	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
+	ldmdb	r2, {r0 - lr}^			@ get calling r0 - lr
 	.endif
 	mov	r0, r0				@ ARMv5T and earlier require a nop
 						@ after ldm {}^
-	add	sp, sp, #S_FRAME_SIZE - S_PC
+	add	sp, sp, #\offset + S_FRAME_SIZE
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm

--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@ -116,8 +116,14 @@ int armpmu_event_set_period(struct perf_event *event)
 		ret = 1;
 	}

-	if (left > (s64)armpmu->max_period)
-		left = armpmu->max_period;
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;

 	local64_set(&hwc->prev_count, (u64)-left);

--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@ -657,10 +657,13 @@ int __init arm_add_memory(u64 start, u64 size)

 	/*
 	 * Ensure that start/size are aligned to a page boundary.
-	 * Size is appropriately rounded down, start is rounded up.
+	 * Size is rounded down, start is rounded up.
 	 */
-	size -= start & ~PAGE_MASK;
 	aligned_start = PAGE_ALIGN(start);
+	if (aligned_start > start + size)
+		size = 0;
+	else
+		size -= aligned_start - start;

 #ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
 	if (aligned_start > ULONG_MAX) {
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@ -246,9 +246,14 @@ static int coherency_type(void)
 	return type;
 }

+/*
+ * As a precaution, we currently completely disable hardware I/O
+ * coherency, until enough testing is done with automatic I/O
+ * synchronization barriers to validate that it is a proper solution.
+ */
 int coherency_available(void)
 {
-	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
+	return false;
 }

 int __init coherency_init(void)
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@ -211,6 +211,7 @@ extern struct device *omap2_get_iva_device(void);
 extern struct device *omap2_get_l3_device(void);
 extern struct device *omap4_get_dsp_device(void);

+unsigned int omap4_xlate_irq(unsigned int hwirq);
 void omap_gic_of_init(void);

 #ifdef CONFIG_CACHE_L2X0
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@ -256,6 +256,38 @@ static int __init omap4_sar_ram_init(void)
 }
 omap_early_initcall(omap4_sar_ram_init);

+static struct of_device_id gic_match[] = {
+	{ .compatible = "arm,cortex-a9-gic", },
+	{ .compatible = "arm,cortex-a15-gic", },
+	{ },
+};
+
+static struct device_node *gic_node;
+
+unsigned int omap4_xlate_irq(unsigned int hwirq)
+{
+	struct of_phandle_args irq_data;
+	unsigned int irq;
+
+	if (!gic_node)
+		gic_node = of_find_matching_node(NULL, gic_match);
+
+	if (WARN_ON(!gic_node))
+		return hwirq;
+
+	irq_data.np = gic_node;
+	irq_data.args_count = 3;
+	irq_data.args[0] = 0;
+	irq_data.args[1] = hwirq - OMAP44XX_IRQ_GIC_START;
+	irq_data.args[2] = IRQ_TYPE_LEVEL_HIGH;
+
+	irq = irq_create_of_mapping(&irq_data);
+	if (WARN_ON(!irq))
+		irq = hwirq;
+
+	return irq;
+}
+
 void __init omap_gic_of_init(void)
 {
 	struct device_node *np;
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@ -3534,9 +3534,15 @@ int omap_hwmod_fill_resources(struct omap_hwmod *oh, struct resource *res)

 	mpu_irqs_cnt = _count_mpu_irqs(oh);
 	for (i = 0; i < mpu_irqs_cnt; i++) {
+		unsigned int irq;
+
+		if (oh->xlate_irq)
+			irq = oh->xlate_irq((oh->mpu_irqs + i)->irq);
+		else
+			irq = (oh->mpu_irqs + i)->irq;
 		(res + r)->name = (oh->mpu_irqs + i)->name;
-		(res + r)->start = (oh->mpu_irqs + i)->irq;
-		(res + r)->end = (oh->mpu_irqs + i)->irq;
+		(res + r)->start = irq;
+		(res + r)->end = irq;
 		(res + r)->flags = IORESOURCE_IRQ;
 		r++;
 	}
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@ -676,6 +676,7 @@ struct omap_hwmod {
 	spinlock_t			_lock;
 	struct list_head		node;
 	struct omap_hwmod_ocp_if	*_mpu_port;
+	unsigned int			(*xlate_irq)(unsigned int);
 	u16				flags;
 	u8				mpu_rt_idx;
 	u8				response_lat;
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@ -479,6 +479,7 @@ static struct omap_hwmod omap44xx_dma_system_hwmod = {
 	.class		= &omap44xx_dma_hwmod_class,
 	.clkdm_name	= "l3_dma_clkdm",
 	.mpu_irqs	= omap44xx_dma_system_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.main_clk	= "l3_div_ck",
 	.prcm = {
 		.omap4 = {
@ -640,6 +641,7 @@ static struct omap_hwmod omap44xx_dss_dispc_hwmod = {
 	.class		= &omap44xx_dispc_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dispc_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dispc_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@ -693,6 +695,7 @@ static struct omap_hwmod omap44xx_dss_dsi1_hwmod = {
 	.class		= &omap44xx_dsi_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dsi1_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dsi1_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@ -726,6 +729,7 @@ static struct omap_hwmod omap44xx_dss_dsi2_hwmod = {
 	.class		= &omap44xx_dsi_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dsi2_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dsi2_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@ -784,6 +788,7 @@ static struct omap_hwmod omap44xx_dss_hdmi_hwmod = {
 	 */
 	.flags		= HWMOD_SWSUP_SIDLE,
 	.mpu_irqs	= omap44xx_dss_hdmi_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_hdmi_sdma_reqs,
 	.main_clk	= "dss_48mhz_clk",
 	.prcm = {
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@ -288,6 +288,7 @@ static struct omap_hwmod omap54xx_dma_system_hwmod = {
 	.class		= &omap54xx_dma_hwmod_class,
 	.clkdm_name	= "dma_clkdm",
 	.mpu_irqs	= omap54xx_dma_system_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.main_clk	= "l3_iclk_div",
 	.prcm = {
 		.omap4 = {
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@ -498,6 +498,7 @@ struct omap_prcm_irq_setup {
 	u8 nr_irqs;
 	const struct omap_prcm_irq *irqs;
 	int irq;
+	unsigned int (*xlate_irq)(unsigned int);
 	void (*read_pending_irqs)(unsigned long *events);
 	void (*ocp_barrier)(void);
 	void (*save_and_clear_irqen)(u32 *saved_mask);
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@ -49,6 +49,7 @@ static struct omap_prcm_irq_setup omap4_prcm_irq_setup = {
 	.irqs			= omap4_prcm_irqs,
 	.nr_irqs		= ARRAY_SIZE(omap4_prcm_irqs),
 	.irq			= 11 + OMAP44XX_IRQ_GIC_START,
+	.xlate_irq		= omap4_xlate_irq,
 	.read_pending_irqs	= &omap44xx_prm_read_pending_irqs,
 	.ocp_barrier		= &omap44xx_prm_ocp_barrier,
 	.save_and_clear_irqen	= &omap44xx_prm_save_and_clear_irqen,
@ -751,8 +752,10 @@ static int omap44xx_prm_late_init(void)
 		}

 		/* Once OMAP4 DT is filled as well */
-		if (irq_num >= 0)
+		if (irq_num >= 0) {
 			omap4_prcm_irq_setup.irq = irq_num;
+			omap4_prcm_irq_setup.xlate_irq = NULL;
+		}
 	}

 	omap44xx_prm_enable_io_wakeup();
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@ -187,6 +187,7 @@ int omap_prcm_event_to_irq(const char *name)
 */
 void omap_prcm_irq_cleanup(void)
 {
+	unsigned int irq;
 	int i;

 	if (!prcm_irq_setup) {
@ -211,7 +212,11 @@ void omap_prcm_irq_cleanup(void)
 	kfree(prcm_irq_setup->priority_mask);
 	prcm_irq_setup->priority_mask = NULL;

-	irq_set_chained_handler(prcm_irq_setup->irq, NULL);
+	if (prcm_irq_setup->xlate_irq)
+		irq = prcm_irq_setup->xlate_irq(prcm_irq_setup->irq);
+	else
+		irq = prcm_irq_setup->irq;
+	irq_set_chained_handler(irq, NULL);

 	if (prcm_irq_setup->base_irq > 0)
 		irq_free_descs(prcm_irq_setup->base_irq,
@ -259,6 +264,7 @@ int omap_prcm_register_chain_handler(struct omap_prcm_irq_setup *irq_setup)
 	int offset, i;
 	struct irq_chip_generic *gc;
 	struct irq_chip_type *ct;
+	unsigned int irq;

 	if (!irq_setup)
 		return -EINVAL;
@ -298,7 +304,11 @@ int omap_prcm_register_chain_handler(struct omap_prcm_irq_setup *irq_setup)
 				1 << (offset & 0x1f);
 	}

-	irq_set_chained_handler(irq_setup->irq, omap_prcm_irq_handler);
+	if (irq_setup->xlate_irq)
+		irq = irq_setup->xlate_irq(irq_setup->irq);
+	else
+		irq = irq_setup->irq;
+	irq_set_chained_handler(irq, omap_prcm_irq_handler);

 	irq_setup->base_irq = irq_alloc_descs(-1, 0, irq_setup->nr_regs * 32,
 		0);
--- a/arch/arm/mach-omap2/twl-common.c
+++ b/arch/arm/mach-omap2/twl-common.c
@ -66,19 +66,24 @@ void __init omap_pmic_init(int bus, u32 clkrate,
 	omap_register_i2c_bus(bus, clkrate, &pmic_i2c_board_info, 1);
 }

+#ifdef CONFIG_ARCH_OMAP4
 void __init omap4_pmic_init(const char *pmic_type,
 		    struct twl4030_platform_data *pmic_data,
 		    struct i2c_board_info *devices, int nr_devices)
 {
 	/* PMIC part*/
+	unsigned int irq;
+
 	omap_mux_init_signal("sys_nirq1", OMAP_PIN_INPUT_PULLUP | OMAP_PIN_OFF_WAKEUPENABLE);
 	omap_mux_init_signal("fref_clk0_out.sys_drm_msecure", OMAP_PIN_OUTPUT);
-	omap_pmic_init(1, 400, pmic_type, 7 + OMAP44XX_IRQ_GIC_START, pmic_data);
+	irq = omap4_xlate_irq(7 + OMAP44XX_IRQ_GIC_START);
+	omap_pmic_init(1, 400, pmic_type, irq, pmic_data);

 	/* Register additional devices on i2c1 bus if needed */
 	if (devices)
 		i2c_register_board_info(1, devices, nr_devices);
 }
+#endif

 void __init omap_pmic_late_init(void)
 {
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@ -576,11 +576,18 @@ void __init r8a7778_init_irq_extpin(int irlm)
 void __init r8a7778_init_irq_dt(void)
 {
 	void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000);
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	void __iomem *gic_dist_base = ioremap_nocache(0xfe438000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xfe430000, 0x1000);
+#endif

 	BUG_ON(!base);

+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+#else
 	irqchip_init();
-
+#endif
 	/* route all interrupts to ARM */
 	__raw_writel(0x73ffffff, base + INT2NTSR0);
 	__raw_writel(0xffffffff, base + INT2NTSR1);
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@ -720,10 +720,17 @@ static int r8a7779_set_wake(struct irq_data *data, unsigned int on)

 void __init r8a7779_init_irq_dt(void)
 {
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000);
+#endif
 	gic_arch_extn.irq_set_wake = r8a7779_set_wake;

+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+#else
 	irqchip_init();
-
+#endif
 	/* route all interrupts to ARM */
 	__raw_writel(0xffffffff, INT2NTSR0);
 	__raw_writel(0x3fffffff, INT2NTSR1);
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@ -85,6 +85,7 @@ vdso_install:
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=$(boot)/dts

 define archhelp
  echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@ -3,6 +3,4 @@ dts-dirs += apm
 dts-dirs += arm
 dts-dirs += cavium

-always		:= $(dtb-y)
 subdir-y	:= $(dts-dirs)
-clean-files	:= *.dtb
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@ -22,7 +22,7 @@
 	};

 	chosen {
-		stdout-path = &soc_uart0;
+		stdout-path = "serial0:115200n8";
 	};

 	psci {
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@ -15,6 +15,7 @@
 */
 #include <linux/debugfs.h>
 #include <linux/fs.h>
+#include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@ -19,12 +19,10 @@
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>

-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
 	vfree(mod->arch.syminfo);
 	mod->arch.syminfo = NULL;
-
-	vfree(module_region);
 }

 static inline int check_rela(Elf32_Rela *rela, struct module *module,
@ -291,12 +289,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,

 	return ret;
 }
-
-int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-		    struct module *module)
-{
-	vfree(module->arch.syminfo);
-	module->arch.syminfo = NULL;
-
-	return 0;
-}
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@ -604,7 +604,7 @@ static ssize_t __sync_serial_read(struct file *file,
 				  struct timespec *ts)
 {
 	unsigned long flags;
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	int avail;
 	struct sync_port *port;
 	unsigned char *start;
--- a/arch/cris/kernel/module.c
+++ b/arch/cris/kernel/module.c
@ -36,7 +36,7 @@ void *module_alloc(unsigned long size)
 }

 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	kfree(module_region);
 }
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@ -94,7 +94,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
 				r = &dev->resource[idx];
 				if (!r->start)
 					continue;
-				pci_claim_resource(dev, idx);
+				pci_claim_bridge_resource(dev, idx);
 			}
 		}
 		pcibios_allocate_bus_resources(&bus->children);
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@ -305,14 +305,12 @@ plt_target (struct plt_entry *plt)
 #endif /* !USE_BRL */

 void
-module_free (struct module *mod, void *module_region)
+module_arch_freeing_init (struct module *mod)
 {
-	if (mod && mod->arch.init_unw_table &&
-	    module_region == mod->module_init) {
+	if (mod->arch.init_unw_table) {
 		unw_remove_unwind_table(mod->arch.init_unw_table);
 		mod->arch.init_unw_table = NULL;
 	}
-	vfree(module_region);
 }

 /* Have we already seen one of these relocations? */
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@ -487,45 +487,39 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 	return 0;
 }

-static int is_valid_resource(struct pci_dev *dev, int idx)
-{
-	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx], *busr;
-
-	if (!dev->bus)
-		return 0;
-
-	pci_bus_for_each_resource(dev->bus, busr, i) {
-		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
-			continue;
-		if ((devr->start) && (devr->start >= busr->start) &&
-				(devr->end <= busr->end))
-			return 1;
-	}
-	return 0;
-}
-
-static void pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
-{
-	int i;
-
-	for (i = start; i < limit; i++) {
-		if (!dev->resource[i].flags)
-			continue;
-		if ((is_valid_resource(dev, i)))
-			pci_claim_resource(dev, i);
-	}
-}
-
 void pcibios_fixup_device_resources(struct pci_dev *dev)
 {
-	pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_resource(dev, idx);
+	}
 }
 EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources);

 static void pcibios_fixup_bridge_resources(struct pci_dev *dev)
 {
-	pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_bridge_resource(dev, idx);
+	}
 }

 /*
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@ -1026,6 +1026,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
 			 pr, (pr && pr->name) ? pr->name : "nil");

 		if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+			struct pci_dev *dev = bus->self;
+
 			if (request_resource(pr, res) == 0)
 				continue;
 			/*
@ -1035,6 +1037,12 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
 			 */
 			if (reparent_resources(pr, res) == 0)
 				continue;
+
+			if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+			    pci_claim_bridge_resource(dev,
+						 i + PCI_BRIDGE_RESOURCES) == 0)
+				continue;
+
 		}
 		pr_warn("PCI: Cannot allocate resource region ");
 		pr_cont("%d of PCI bridge %d, will remap\n", i, bus->number);
@ -1227,7 +1235,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
 				 (unsigned long long)r->end,
 				 (unsigned int)r->flags);

-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}

--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@ -1388,7 +1388,7 @@ out:
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);

 	bpf_prog_unlock_free(fp);
 }
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@ -106,7 +106,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
 				if (!r->flags)
 					continue;
 				if (!r->start ||
-				    pci_claim_resource(dev, idx) < 0) {
+				    pci_claim_bridge_resource(dev, idx) < 0) {
 					printk(KERN_ERR "PCI:"
 					       " Cannot allocate resource"
 					       " region %d of bridge %s\n",
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@ -281,42 +281,37 @@ static int __init pci_check_direct(void)
 	return -ENODEV;
 }

-static int is_valid_resource(struct pci_dev *dev, int idx)
-{
-	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx], *busr;
-
-	if (dev->bus) {
-		pci_bus_for_each_resource(dev->bus, busr, i) {
-			if (!busr || (busr->flags ^ devr->flags) & type_mask)
-				continue;
-
-			if (devr->start &&
-			    devr->start >= busr->start &&
-			    devr->end <= busr->end)
-				return 1;
-		}
-	}
-
-	return 0;
-}
-
 static void pcibios_fixup_device_resources(struct pci_dev *dev)
 {
-	int limit, i;
+	int idx;

-	if (dev->bus->number != 0)
+	if (!dev->bus)
 		return;

-	limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ?
-		PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES;
+	for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];

-	for (i = 0; i < limit; i++) {
-		if (!dev->resource[i].flags)
+		if (!r->flags || r->parent || !r->start)
 			continue;

-		if (is_valid_resource(dev, i))
-			pci_claim_resource(dev, i);
+		pci_claim_resource(dev, idx);
+	}
+}
+
+static void pcibios_fixup_bridge_resources(struct pci_dev *dev)
+{
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_bridge_resource(dev, idx);
 	}
 }

@ -330,7 +325,7 @@ void pcibios_fixup_bus(struct pci_bus *bus)

 	if (bus->self) {
 		pci_read_bridge_bases(bus);
-		pcibios_fixup_device_resources(bus->self);
+		pcibios_fixup_bridge_resources(bus->self);
 	}

 	list_for_each_entry(dev, &bus->devices, bus_list)
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@ -36,7 +36,7 @@ void *module_alloc(unsigned long size)
 }

 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	kfree(module_region);
 }
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@ -200,7 +200,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,

 	/* Set up to return from userspace; jump to fixed address sigreturn
 	   trampoline on kuser page.  */
-	regs->ra = (unsigned long) (0x1040);
+	regs->ra = (unsigned long) (0x1044);

 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@ -298,14 +298,10 @@ static inline unsigned long count_stubs(const Elf_Rela *rela, unsigned long n)
 }
 #endif

-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
 	kfree(mod->arch.section);
 	mod->arch.section = NULL;
-
-	vfree(module_region);
 }

 /* Additional bytes needed in front of individual sections */
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@ -1184,6 +1184,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
 			 pr, (pr && pr->name) ? pr->name : "nil");

 		if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+			struct pci_dev *dev = bus->self;
+
 			if (request_resource(pr, res) == 0)
 				continue;
 			/*
@ -1193,6 +1195,11 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus)
 			 */
 			if (reparent_resources(pr, res) == 0)
 				continue;
+
+			if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+			    pci_claim_bridge_resource(dev,
+						i + PCI_BRIDGE_RESOURCES) == 0)
+				continue;
 		}
 		pr_warning("PCI: Cannot allocate resource region "
 			   "%d of PCI bridge %d, will remap\n", i, bus->number);
@ -1401,7 +1408,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
 				 (unsigned long long)r->end,
 				 (unsigned int)r->flags);

-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}

--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@ -699,7 +699,7 @@ out:
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);

 	bpf_prog_unlock_free(fp);
 }
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@ -304,7 +304,7 @@ int pnv_save_sprs_for_winkle(void)
 	 * all cpus at boot. Get these reg values of current cpu and use the
 	 * same accross all cpus.
 	 */
-	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
 	uint64_t hid0_val = mfspr(SPRN_HID0);
 	uint64_t hid1_val = mfspr(SPRN_HID1);
 	uint64_t hid4_val = mfspr(SPRN_HID4);
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@ -337,6 +337,7 @@ static inline void disable_surveillance(void)
 	args.token = rtas_token("set-indicator");
 	if (args.token == RTAS_UNKNOWN_SERVICE)
 		return;
+	args.token = cpu_to_be32(args.token);
 	args.nargs = cpu_to_be32(3);
 	args.nret = cpu_to_be32(1);
 	args.rets = &args.args[3];
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@ -55,14 +55,10 @@ void *module_alloc(unsigned long size)
 }
 #endif

-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
-	if (mod) {
-		vfree(mod->arch.syminfo);
-		mod->arch.syminfo = NULL;
-	}
-	vfree(module_region);
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
 }

 static void check_rela(Elf_Rela *rela, struct module *me)
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@ -22,8 +22,8 @@
 * skb_copy_bits takes 4 parameters:
 *   %r2 = skb pointer
 *   %r3 = offset into skb data
- *   %r4 = length to copy
- *   %r5 = pointer to temp buffer
+ *   %r4 = pointer to temp buffer
+ *   %r5 = length to copy
 */
 #define SKBDATA	%r8

@ -44,8 +44,9 @@ ENTRY(sk_load_word)

 sk_load_word_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,4			# 4 bytes
-	la	%r5,160(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,160(%r15)		# pointer to temp buffer
+	lghi	%r5,4			# 4 bytes
 	brasl	%r14,skb_copy_bits	# get data from skb
 	l	%r5,160(%r15)		# load result from temp buffer
 	ltgr	%r2,%r2			# set cc to (%r2 != 0)
@ -69,8 +70,9 @@ ENTRY(sk_load_half)

 sk_load_half_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,2			# 2 bytes
-	la	%r5,162(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,162(%r15)		# pointer to temp buffer
+	lghi	%r5,2			# 2 bytes
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(2,%r15),160(%r15)
 	l	%r5,160(%r15)		# load result from temp buffer
@ -95,8 +97,9 @@ ENTRY(sk_load_byte)

 sk_load_byte_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,1			# 1 bytes
-	la	%r5,163(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,163(%r15)		# pointer to temp buffer
+	lghi	%r5,1			# 1 byte
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(3,%r15),160(%r15)
 	l	%r5,160(%r15)		# load result from temp buffer
@ -104,11 +107,11 @@ sk_load_byte_slow:
 	lgr	%r2,%r9			# restore %r2
 	br	%r8

-	/* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+	/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
 ENTRY(sk_load_byte_msh)
 	llgfr	%r1,%r3			# extend offset
 	clr	%r11,%r3		# hlen < offset ?
-	jle	sk_load_byte_slow
+	jle	sk_load_byte_msh_slow
 	lhi	%r12,0
 	ic	%r12,0(%r1,%r10)	# get byte from skb
 	nill	%r12,0x0f
@ -118,8 +121,9 @@ ENTRY(sk_load_byte_msh)

 sk_load_byte_msh_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,2			# 2 bytes
-	la	%r5,162(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,163(%r15)		# pointer to temp buffer
+	lghi	%r5,1			# 1 byte
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(3,%r15),160(%r15)
 	l	%r12,160(%r15)		# load result from temp buffer
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@ -448,15 +448,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		mask = 0x800000; /* je */
 kbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
-			if (K <= 16383)
-				/* chi %r5,<K> */
-				EMIT4_IMM(0xa75e0000, K);
-			else if (test_facility(21))
+			if (test_facility(21))
 				/* clfi %r5,<K> */
 				EMIT6_IMM(0xc25f0000, K);
 			else
-				/* c %r5,<d(K)>(%r13) */
-				EMIT4_DISP(0x5950d000, EMIT_CONST(K));
+				/* cl %r5,<d(K)>(%r13) */
+				EMIT4_DISP(0x5550d000, EMIT_CONST(K));
 		}
 branch:		if (filter->jt == filter->jf) {
 			if (filter->jt == 0)
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@ -639,7 +639,10 @@ static void pci_claim_bus_resources(struct pci_bus *bus)
 				       (unsigned long long)r->end,
 				       (unsigned int)r->flags);

-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}

--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@ -776,7 +776,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 				if (unlikely(proglen + ilen > oldproglen)) {
 					pr_err("bpb_jit_compile fatal error\n");
 					kfree(addrs);
-					module_free(NULL, image);
+					module_memfree(image);
 					return;
 				}
 				memcpy(image + proglen, temp, ilen);
@ -822,7 +822,7 @@ out:
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);

 	bpf_prog_unlock_free(fp);
 }
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@ -74,7 +74,7 @@ error:


 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	vfree(module_region);

@ -83,7 +83,7 @@ void module_free(struct module *mod, void *module_region)
 		     0, 0, 0, NULL, NULL, 0);

 	/*
-	 * FIXME: If module_region == mod->module_init, trim exception
+	 * FIXME: Add module_arch_freeing_init to trim exception
 	 * table entries.
 	 */
 }
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -857,7 +857,7 @@ source "kernel/Kconfig.preempt"

 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
-	depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
+	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
 	---help---
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  integrated interrupt controller in the CPU. If you have a single-CPU
@ -868,6 +868,10 @@ config X86_UP_APIC
 	  performance counters), and the NMI watchdog which detects hard
 	  lockups.

+config X86_UP_APIC_MSI
+	def_bool y
+	select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
+
 config X86_UP_IOAPIC
 	bool "IO-APIC support on uniprocessors"
 	depends on X86_UP_APIC
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@ -90,7 +90,7 @@ suffix-$(CONFIG_KERNEL_LZO) 	:= lzo
 suffix-$(CONFIG_KERNEL_LZ4) 	:= lz4

 RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
-	     perl $(srctree)/arch/x86/tools/calc_run_size.pl)
+	     $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
 quiet_cmd_mkpiggy = MKPIGGY $@
      cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )

--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@ -373,6 +373,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 				  unsigned long output_len,
 				  unsigned long run_size)
 {
+	unsigned char *output_orig = output;
+
 	real_mode = rmode;

 	sanitize_boot_params(real_mode);
@ -421,7 +423,12 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 	debug_putstr("\nDecompressing Linux... ");
 	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
 	parse_elf(output);
-	handle_relocations(output, output_len);
+	/*
+	 * 32-bit always performs relocations. 64-bit relocations are only
+	 * needed if kASLR has chosen a different load address.
+	 */
+	if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
+		handle_relocations(output, output_len);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16);

 extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
 				  int trigger, int polarity);
+extern void (*__acpi_unregister_gsi)(u32 gsi);

 static inline void disable_acpi(void)
 {
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 }

-#define _LDT_empty(info)				\
+/* This intentionally ignores lm, since 32-bit apps don't have that field. */
+#define LDT_empty(info)					\
 	((info)->base_addr		== 0	&&	\
 	 (info)->limit			== 0	&&	\
 	 (info)->contents		== 0	&&	\
@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 	 (info)->seg_not_present	== 1	&&	\
 	 (info)->useable		== 0)

-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
+/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
+static inline bool LDT_zero(const struct user_desc *info)
+{
+	return (info->base_addr		== 0 &&
+		info->limit		== 0 &&
+		info->contents		== 0 &&
+		info->read_exec_only	== 0 &&
+		info->seg_32bit		== 0 &&
+		info->limit_in_pages	== 0 &&
+		info->seg_not_present	== 0 &&
+		info->useable		== 0);
+}

 static inline void clear_LDT(void)
 {
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@ -130,7 +130,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 			      unsigned long start, unsigned long end)
 {
-	mpx_notify_unmap(mm, vma, start, end);
+	/*
+	 * mpx_notify_unmap() goes and reads a rarely-hot
+	 * cacheline in the mm_struct.  That can be expensive
+	 * enough to be seen in profiles.
+	 *
+	 * The mpx_notify_unmap() call and its contents have been
+	 * observed to affect munmap() performance on hardware
+	 * where MPX is not present.
+	 *
+	 * The unlikely() optimizes for the fast case: no MPX
+	 * in the CPU, or no MPX use in the process.  Even if
+	 * we get this wrong (in the unlikely event that MPX
+	 * is widely enabled on some system) the overhead of
+	 * MPX itself (reading bounds tables) is expected to
+	 * overwhelm the overhead of getting this unlikely()
+	 * consistently wrong.
+	 */
+	if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+		mpx_notify_unmap(mm, vma, start, end);
 }

 #endif /* _ASM_X86_MMU_CONTEXT_H */
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@ -611,20 +611,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)

 int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 {
-	int irq;
+	int rc, irq, trigger, polarity;

-	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		*irqp = gsi;
-	} else {
-		mutex_lock(&acpi_ioapic_lock);
-		irq = mp_map_gsi_to_irq(gsi,
-					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
-		mutex_unlock(&acpi_ioapic_lock);
-		if (irq < 0)
-			return -1;
-		*irqp = irq;
+	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+	if (rc == 0) {
+		trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+		irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
+		if (irq >= 0) {
+			*irqp = irq;
+			return 0;
+		}
 	}
-	return 0;
+
+	return -1;
 }
 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);

--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = {
 	.rating		= 400, /* use this when running on Hyperv*/
 	.read		= read_hv_clock,
 	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };

 static void __init ms_hyperv_init_platform(void)
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@ -674,7 +674,7 @@ static inline void *alloc_tramp(unsigned long size)
 }
 static inline void tramp_free(void *tramp)
 {
-	module_free(NULL, tramp);
+	module_memfree(tramp);
 }
 #else
 /* Trampolines can only be created if modules are supported */
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@ -127,7 +127,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
-	seq_printf(p, "%*s: ", prec, "THR");
+	seq_printf(p, "%*s: ", prec, "HYP");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
 	seq_puts(p, "  Hypervisor callback interrupts\n");
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@ -29,7 +29,28 @@ static int get_free_idx(void)

 static bool tls_desc_okay(const struct user_desc *info)
 {
-	if (LDT_empty(info))
+	/*
+	 * For historical reasons (i.e. no one ever documented how any
+	 * of the segmentation APIs work), user programs can and do
+	 * assume that a struct user_desc that's all zeros except for
+	 * entry_number means "no segment at all".  This never actually
+	 * worked.  In fact, up to Linux 3.19, a struct user_desc like
+	 * this would create a 16-bit read-write segment with base and
+	 * limit both equal to zero.
+	 *
+	 * That was close enough to "no segment at all" until we
+	 * hardened this function to disallow 16-bit TLS segments.  Fix
+	 * it up by interpreting these zeroed segments the way that they
+	 * were almost certainly intended to be interpreted.
+	 *
+	 * The correct way to ask for "no segment at all" is to specify
+	 * a user_desc that satisfies LDT_empty.  To keep everything
+	 * working, we accept both.
+	 *
+	 * Note that there's a similar kludge in modify_ldt -- look at
+	 * the distinction between modes 1 and 0x11.
+	 */
+	if (LDT_empty(info) || LDT_zero(info))
 		return true;

 	/*
@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
 	cpu = get_cpu();

 	while (n-- > 0) {
-		if (LDT_empty(info))
+		if (LDT_empty(info) || LDT_zero(info))
 			desc->a = desc->b = 0;
 		else
 			fill_ldt(desc, info);
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@ -617,7 +617,7 @@ static unsigned long quick_pit_calibrate(void)
 			goto success;
 		}
 	}
-	pr_err("Fast TSC calibration failed\n");
+	pr_info("Fast TSC calibration failed\n");
 	return 0;

 success:
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@ -2348,7 +2348,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	 * Not recognized on AMD in compat mode (but is recognized in legacy
 	 * mode).
 	 */
-	if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
+	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);

@ -2359,25 +2359,13 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	setup_syscalls_segments(ctxt, &cs, &ss);

 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
-	switch (ctxt->mode) {
-	case X86EMUL_MODE_PROT32:
-		if ((msr_data & 0xfffc) == 0x0)
-			return emulate_gp(ctxt, 0);
-		break;
-	case X86EMUL_MODE_PROT64:
-		if (msr_data == 0x0)
-			return emulate_gp(ctxt, 0);
-		break;
-	default:
-		break;
-	}
+	if ((msr_data & 0xfffc) == 0x0)
+		return emulate_gp(ctxt, 0);

 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-	cs_sel = (u16)msr_data;
-	cs_sel &= ~SELECTOR_RPL_MASK;
+	cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
 	ss_sel = cs_sel + 8;
-	ss_sel &= ~SELECTOR_RPL_MASK;
-	if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
+	if (efer & EFER_LMA) {
 		cs.d = 0;
 		cs.l = 1;
 	}
@ -2386,10 +2374,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);

 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
-	ctxt->_eip = msr_data;
+	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;

 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
+	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
+							      (u32)msr_data;

 	return X86EMUL_CONTINUE;
 }
@ -3791,8 +3780,8 @@ static const struct opcode group5[] = {
 };

 static const struct opcode group6[] = {
-	DI(Prot,	sldt),
-	DI(Prot,	str),
+	DI(Prot | DstMem,	sldt),
+	DI(Prot | DstMem,	str),
 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
 	II(Prot | Priv | SrcMem16, em_ltr, ltr),
 	N, N, N, N,
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@ -43,7 +43,7 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
 	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
 	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
 };
-EXPORT_SYMBOL_GPL(__cachemode2pte_tbl);
+EXPORT_SYMBOL(__cachemode2pte_tbl);
 uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
 	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
@ -54,7 +54,7 @@ uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
-EXPORT_SYMBOL_GPL(__pte2cachemode_tbl);
+EXPORT_SYMBOL(__pte2cachemode_tbl);

 static unsigned long __initdata pgt_buf_start;
 static unsigned long __initdata pgt_buf_end;
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@ -348,6 +348,12 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
 	if (!cpu_feature_enabled(X86_FEATURE_MPX))
 		return MPX_INVALID_BOUNDS_DIR;

+	/*
+	 * 32-bit binaries on 64-bit kernels are currently
+	 * unsupported.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
+		return MPX_INVALID_BOUNDS_DIR;
 	/*
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@ -234,8 +234,13 @@ void pat_init(void)
 	      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);

 	/* Boot CPU check */
-	if (!boot_pat_state)
+	if (!boot_pat_state) {
 		rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+		if (!boot_pat_state) {
+			pat_disable("PAT read returns always zero, disabled.");
+			return;
+		}
+	}

 	wrmsrl(MSR_IA32_CR_PAT, pat);

--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@ -216,7 +216,7 @@ static void pcibios_allocate_bridge_resources(struct pci_dev *dev)
 			continue;
 		if (r->parent)	/* Already allocated */
 			continue;
-		if (!r->start || pci_claim_resource(dev, idx) < 0) {
+		if (!r->start || pci_claim_bridge_resource(dev, idx) < 0) {
 			/*
 			 * Something is wrong with the region.
 			 * Invalidate the resource to prevent
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@ -458,6 +458,7 @@ int __init pci_xen_hvm_init(void)
 	 * just how GSIs get registered.
 	 */
 	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
+	__acpi_unregister_gsi = NULL;
 #endif

 #ifdef CONFIG_PCI_MSI
@ -471,52 +472,6 @@ int __init pci_xen_hvm_init(void)
 }

 #ifdef CONFIG_XEN_DOM0
-static __init void xen_setup_acpi_sci(void)
-{
-	int rc;
-	int trigger, polarity;
-	int gsi = acpi_sci_override_gsi;
-	int irq = -1;
-	int gsi_override = -1;
-
-	if (!gsi)
-		return;
-
-	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
-	if (rc) {
-		printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
-				" sci, rc=%d\n", rc);
-		return;
-	}
-	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
-	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
-	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
-			"polarity=%d\n", gsi, trigger, polarity);
-
-	/* Before we bind the GSI to a Linux IRQ, check whether
-	 * we need to override it with bus_irq (IRQ) value. Usually for
-	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
-	 * but there are oddballs where the IRQ != GSI:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
-	 * which ends up being: gsi_to_irq[9] == 20
-	 * (which is what acpi_gsi_to_irq ends up calling when starting the
-	 * the ACPI interpreter and keels over since IRQ 9 has not been
-	 * setup as we had setup IRQ 20 for it).
-	 */
-	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
-		/* Use the provided value if it's valid. */
-		if (irq >= 0)
-			gsi_override = irq;
-	}
-
-	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
-	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
-
-	return;
-}
-
 int __init pci_xen_initial_domain(void)
 {
 	int irq;
@ -527,8 +482,8 @@ int __init pci_xen_initial_domain(void)
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 	pci_msi_ignore_mask = 1;
 #endif
-	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
+	__acpi_unregister_gsi = NULL;
 	/* Pre-allocate legacy irqs */
 	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
 		int trigger, polarity;
--- a/arch/x86/tools/calc_run_size.pl
+++ b/arch/x86/tools/calc_run_size.pl
@ -1,39 +0,0 @@
-#!/usr/bin/perl
-#
-# Calculate the amount of space needed to run the kernel, including room for
-# the .bss and .brk sections.
-#
-# Usage:
-# objdump -h a.out | perl calc_run_size.pl
-use strict;
-
-my $mem_size = 0;
-my $file_offset = 0;
-
-my $sections=" *[0-9]+ \.(?:bss|brk) +";
-while (<>) {
-	if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) {
-		my $size = hex($1);
-		my $offset = hex($2);
-		$mem_size += $size;
-		if ($file_offset == 0) {
-			$file_offset = $offset;
-		} elsif ($file_offset != $offset) {
-			# BFD linker shows the same file offset in ELF.
-			# Gold linker shows them as consecutive.
-			next if ($file_offset + $mem_size == $offset + $size);
-
-			printf STDERR "file_offset: 0x%lx\n", $file_offset;
-			printf STDERR "mem_size: 0x%lx\n", $mem_size;
-			printf STDERR "offset: 0x%lx\n", $offset;
-			printf STDERR "size: 0x%lx\n", $size;
-
-			die ".bss and .brk are non-contiguous\n";
-		}
-	}
-}
-
-if ($file_offset == 0) {
-	die "Never found .bss or .brk file offset\n";
-}
-printf("%d\n", $mem_size + $file_offset);
--- a/arch/x86/tools/calc_run_size.sh
+++ b/arch/x86/tools/calc_run_size.sh
@ -0,0 +1,42 @@
+#!/bin/sh
+#
+# Calculate the amount of space needed to run the kernel, including room for
+# the .bss and .brk sections.
+#
+# Usage:
+# objdump -h a.out | sh calc_run_size.sh
+
+NUM='\([0-9a-fA-F]*[ \t]*\)'
+OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p')
+if [ -z "$OUT" ] ; then
+	echo "Never found .bss or .brk file offset" >&2
+	exit 1
+fi
+
+OUT=$(echo ${OUT# })
+sizeA=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+offsetA=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+sizeB=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+offsetB=$(printf "%d" 0x${OUT%% *})
+
+run_size=$(( $offsetA + $sizeA + $sizeB ))
+
+# BFD linker shows the same file offset in ELF.
+if [ "$offsetA" -ne "$offsetB" ] ; then
+	# Gold linker shows them as consecutive.
+	endB=$(( $offsetB + $sizeB ))
+	if [ "$endB" != "$run_size" ] ; then
+		printf "sizeA: 0x%x\n" $sizeA >&2
+		printf "offsetA: 0x%x\n" $offsetA >&2
+		printf "sizeB: 0x%x\n" $sizeB >&2
+		printf "offsetB: 0x%x\n" $offsetB >&2
+		echo ".bss and .brk are non-contiguous" >&2
+		exit 1
+	fi
+fi
+
+printf "%d\n" $run_size
+exit 0
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@ -15,6 +15,26 @@

 static void blk_mq_sysfs_release(struct kobject *kobj)
 {
+	struct request_queue *q;
+
+	q = container_of(kobj, struct request_queue, mq_kobj);
+	free_percpu(q->queue_ctx);
+}
+
+static void blk_mq_ctx_release(struct kobject *kobj)
+{
+	struct blk_mq_ctx *ctx;
+
+	ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+	kobject_put(&ctx->queue->mq_kobj);
+}
+
+static void blk_mq_hctx_release(struct kobject *kobj)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
+	kfree(hctx);
 }

 struct blk_mq_ctx_sysfs_entry {
@ -318,13 +338,13 @@ static struct kobj_type blk_mq_ktype = {
 static struct kobj_type blk_mq_ctx_ktype = {
 	.sysfs_ops	= &blk_mq_sysfs_ops,
 	.default_attrs	= default_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_ctx_release,
 };

 static struct kobj_type blk_mq_hw_ktype = {
 	.sysfs_ops	= &blk_mq_hw_sysfs_ops,
 	.default_attrs	= default_hw_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_hctx_release,
 };

 static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@ -355,6 +375,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 		return ret;

 	hctx_for_each_ctx(hctx, ctx, i) {
+		kobject_get(&q->mq_kobj);
 		ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
 		if (ret)
 			break;
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@ -1641,10 +1641,8 @@ static void blk_mq_free_hw_queues(struct request_queue *q,
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;

-	queue_for_each_hw_ctx(q, hctx, i) {
+	queue_for_each_hw_ctx(q, hctx, i)
 		free_cpumask_var(hctx->cpumask);
-		kfree(hctx);
-	}
 }

 static int blk_mq_init_hctx(struct request_queue *q,
@ -2002,11 +2000,9 @@ void blk_mq_free_queue(struct request_queue *q)

 	percpu_ref_exit(&q->mq_usage_counter);

-	free_percpu(q->queue_ctx);
 	kfree(q->queue_hw_ctx);
 	kfree(q->mq_map);

-	q->queue_ctx = NULL;
 	q->queue_hw_ctx = NULL;
 	q->mq_map = NULL;

--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@ -512,7 +512,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		dev->irq = 0;
 		dev->irq_managed = 0;
 	}
 }
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@ -106,7 +106,7 @@ struct nvme_queue {
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
 	u16 q_depth;
-	u16 cq_vector;
+	s16 cq_vector;
 	u16 sq_head;
 	u16 sq_tail;
 	u16 cq_head;
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@ -210,12 +210,25 @@ static void mvebu_mbus_disable_window(struct mvebu_mbus_state *mbus,
 }

 /* Checks whether the given window number is available */
+
+/* On Armada XP, 375 and 38x the MBus window 13 has the remap
+ * capability, like windows 0 to 7. However, the mvebu-mbus driver
+ * isn't currently taking into account this special case, which means
+ * that when window 13 is actually used, the remap registers are left
+ * to 0, making the device using this MBus window unavailable. The
+ * quick fix for stable is to not use window 13. A follow up patch
+ * will correctly handle this window.
+*/
 static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus,
 				     const int win)
 {
 	void __iomem *addr = mbus->mbuswins_base +
 		mbus->soc->win_cfg_offset(win);
 	u32 ctrl = readl(addr + WIN_CTRL_OFF);
+
+	if (win == 13)
+		return false;
+
 	return !(ctrl & WIN_CTRL_ENABLE);
 }

--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@ -68,9 +68,8 @@ static void kona_timer_disable_and_clear(void __iomem *base)
 }

 static void
-kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
+kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
 {
-	void __iomem *base = IOMEM(timer_base);
 	int loop_limit = 4;

 	/*
@ -86,9 +85,9 @@ kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
 	 */

 	while (--loop_limit) {
-		*msw = readl(base + KONA_GPTIMER_STCHI_OFFSET);
-		*lsw = readl(base + KONA_GPTIMER_STCLO_OFFSET);
-		if (*msw == readl(base + KONA_GPTIMER_STCHI_OFFSET))
+		*msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
+		*lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
+		if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
 			break;
 	}
 	if (!loop_limit) {
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@ -97,8 +97,8 @@ static void exynos4_mct_write(unsigned int value, unsigned long offset)
 	writel_relaxed(value, reg_base + offset);

 	if (likely(offset >= EXYNOS4_MCT_L_BASE(0))) {
-		stat_addr = (offset & ~EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET;
-		switch (offset & EXYNOS4_MCT_L_MASK) {
+		stat_addr = (offset & EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET;
+		switch (offset & ~EXYNOS4_MCT_L_MASK) {
 		case MCT_L_TCON_OFFSET:
 			mask = 1 << 3;		/* L_TCON write status */
 			break;
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@ -428,7 +428,7 @@ static void sh_tmu_register_clockevent(struct sh_tmu_channel *ch,
 	ced->features = CLOCK_EVT_FEAT_PERIODIC;
 	ced->features |= CLOCK_EVT_FEAT_ONESHOT;
 	ced->rating = 200;
-	ced->cpumask = cpumask_of(0);
+	ced->cpumask = cpu_possible_mask;
 	ced->set_next_event = sh_tmu_clock_event_next;
 	ced->set_mode = sh_tmu_clock_event_mode;
 	ced->suspend = sh_tmu_clock_event_suspend;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@ -183,16 +183,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->shared_resources = *gpu_resources;

 	/* calculate max size of mqds needed for queues */
-	size = max_num_of_processes *
-		max_num_of_queues_per_process *
-		kfd->device_info->mqd_size_aligned;
+	size = max_num_of_queues_per_device *
+			kfd->device_info->mqd_size_aligned;

 	/*
 	 * calculate max size of runlist packet.
 	 * There can be only 2 packets at once
 	 */
-	size += (max_num_of_processes * sizeof(struct pm4_map_process) +
-		max_num_of_processes * max_num_of_queues_per_process *
+	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
+		max_num_of_queues_per_device *
 		sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;

 	/* Add size of HIQ & DIQ */
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@ -135,6 +135,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,

 	mutex_lock(&dqm->lock);

+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
 	if (list_empty(&qpd->queues_list)) {
 		retval = allocate_vmid(dqm, qpd, q);
 		if (retval != 0) {
@ -161,8 +168,18 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,

 	list_add(&q->list, &qpd->queues_list);
 	dqm->queue_count++;
+
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		dqm->sdma_queue_count++;
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	mutex_unlock(&dqm->lock);
 	return 0;
 }
@ -297,6 +314,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 	if (list_empty(&qpd->queues_list))
 		deallocate_vmid(dqm, qpd, q);
 	dqm->queue_count--;
+
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@ -470,10 +496,14 @@ int init_pipelines(struct device_queue_manager *dqm,

 	for (i = 0; i < pipes_num; i++) {
 		inx = i + first_pipe;
+		/*
+		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
+		 * space in GTT for pipelines we don't initialize
+		 */
 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
 		/* = log2(bytes/4)-1 */
-		kfd2kgd->init_pipeline(dqm->dev->kgd, i,
+		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
 	}

@ -488,8 +518,7 @@ static int init_scheduler(struct device_queue_manager *dqm)

 	pr_debug("kfd: In %s\n", __func__);

-	retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
-
+	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
 	return retval;
 }

@ -744,6 +773,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	pr_debug("kfd: In func %s\n", __func__);

 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	list_add(&kq->list, &qpd->priv_queue_list);
 	dqm->queue_count++;
 	qpd->is_debug = true;
@ -767,6 +811,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	dqm->queue_count--;
 	qpd->is_debug = false;
 	execute_queues_cpsch(dqm, false);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 }

@ -793,6 +844,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

 	mutex_lock(&dqm->lock);

+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
+
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		select_sdma_engine_id(q);

@ -817,6 +875,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 			dqm->sdma_queue_count++;
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);

 out:
 	mutex_unlock(&dqm->lock);
@ -958,6 +1024,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,

 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	mutex_unlock(&dqm->lock);

 	return 0;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@ -144,6 +144,7 @@ struct device_queue_manager {
 	unsigned int		processes_count;
 	unsigned int		queue_count;
 	unsigned int		sdma_queue_count;
+	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
 	unsigned int		sdma_bitmap;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@ -50,15 +50,10 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
 	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");

-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
-
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");

 bool kgd2kfd_init(unsigned interface_version,
 		  const struct kfd2kgd_calls *f2g,
@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
 	}

 	/* Verify module parameters */
-	if ((max_num_of_processes < 0) ||
-		(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
-		pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
-		return -1;
-	}
-
-	if ((max_num_of_queues_per_process < 0) ||
-		(max_num_of_queues_per_process >
-			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
-		pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
+	if ((max_num_of_queues_per_device < 0) ||
+		(max_num_of_queues_per_device >
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+		pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
 		return -1;
 	}

--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);

 int kfd_pasid_init(void)
 {
-	pasid_limit = max_num_of_processes;
+	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;

 	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
 	if (!pasid_bitmap)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@ -52,20 +52,19 @@
 #define kfd_alloc_struct(ptr_to_struct)	\
 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))

-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
 #define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024

 /*
- * Kernel module parameter to specify maximum number of supported queues
- * per process
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
 */
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;

-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)

 #define KFD_KERNEL_QUEUE_SIZE 2048

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@ -54,11 +54,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	pr_debug("kfd: in %s\n", __func__);

 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
-			max_num_of_queues_per_process);
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);

 	pr_debug("kfd: the new slot id %lu\n", found);

-	if (found >= max_num_of_queues_per_process) {
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
 		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
 				pqm->process->pasid);
 		return -ENOMEM;
@ -76,7 +76,7 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)

 	INIT_LIST_HEAD(&pqm->queues);
 	pqm->queue_slot_bitmap =
-			kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 					BITS_PER_BYTE), GFP_KERNEL);
 	if (pqm->queue_slot_bitmap == NULL)
 		return -ENOMEM;
@ -206,6 +206,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		pqn->kq = NULL;
 		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
 						&q->properties.vmid);
+		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
@ -226,7 +227,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}

 	if (retval != 0) {
-		pr_err("kfd: error dqm create queue\n");
+		pr_debug("Error dqm create queue\n");
 		goto err_create_queue;
 	}

@ -245,7 +246,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 err_create_queue:
 	kfree(pqn);
 err_allocate_pqn:
+	/* check if queues list is empty unregister process from device */
 	clear_bit(*qid, pqm->queue_slot_bitmap);
+	if (list_empty(&pqm->queues))
+		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
 	return retval;
 }

--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@ -32,6 +32,8 @@
 struct tda998x_priv {
 	struct i2c_client *cec;
 	struct i2c_client *hdmi;
+	struct mutex mutex;
+	struct delayed_work dwork;
 	uint16_t rev;
 	uint8_t current_page;
 	int dpms;
@ -402,9 +404,10 @@ reg_read_range(struct tda998x_priv *priv, uint16_t reg, char *buf, int cnt)
 	uint8_t addr = REG2ADDR(reg);
 	int ret;

+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return ret;
+		goto out;

 	ret = i2c_master_send(client, &addr, sizeof(addr));
 	if (ret < 0)
@ -414,10 +417,12 @@ reg_read_range(struct tda998x_priv *priv, uint16_t reg, char *buf, int cnt)
 	if (ret < 0)
 		goto fail;

-	return ret;
+	goto out;

 fail:
 	dev_err(&client->dev, "Error %d reading from 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 	return ret;
 }

@ -431,13 +436,16 @@ reg_write_range(struct tda998x_priv *priv, uint16_t reg, uint8_t *p, int cnt)
 	buf[0] = REG2ADDR(reg);
 	memcpy(&buf[1], p, cnt);

+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;

 	ret = i2c_master_send(client, buf, cnt + 1);
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }

 static int
@ -459,13 +467,16 @@ reg_write(struct tda998x_priv *priv, uint16_t reg, uint8_t val)
 	uint8_t buf[] = {REG2ADDR(reg), val};
 	int ret;

+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;

 	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }

 static void
@ -475,13 +486,16 @@ reg_write16(struct tda998x_priv *priv, uint16_t reg, uint16_t val)
 	uint8_t buf[] = {REG2ADDR(reg), val >> 8, val};
 	int ret;

+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;

 	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }

 static void
@ -536,6 +550,17 @@ tda998x_reset(struct tda998x_priv *priv)
 	reg_write(priv, REG_MUX_VP_VIP_OUT, 0x24);
 }

+/* handle HDMI connect/disconnect */
+static void tda998x_hpd(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct tda998x_priv *priv =
+			container_of(dwork, struct tda998x_priv, dwork);
+
+	if (priv->encoder && priv->encoder->dev)
+		drm_kms_helper_hotplug_event(priv->encoder->dev);
+}
+
 /*
 * only 2 interrupts may occur: screen plug/unplug and EDID read
 */
@ -559,8 +584,7 @@ static irqreturn_t tda998x_irq_thread(int irq, void *data)
 		priv->wq_edid_wait = 0;
 		wake_up(&priv->wq_edid);
 	} else if (cec != 0) {			/* HPD change */
-		if (priv->encoder && priv->encoder->dev)
-			drm_helper_hpd_irq_event(priv->encoder->dev);
+		schedule_delayed_work(&priv->dwork, HZ/10);
 	}
 	return IRQ_HANDLED;
 }
@ -1170,8 +1194,10 @@ static void tda998x_destroy(struct tda998x_priv *priv)
 	/* disable all IRQs and free the IRQ handler */
 	cec_write(priv, REG_CEC_RXSHPDINTENA, 0);
 	reg_clear(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
-	if (priv->hdmi->irq)
+	if (priv->hdmi->irq) {
 		free_irq(priv->hdmi->irq, priv);
+		cancel_delayed_work_sync(&priv->dwork);
+	}

 	i2c_unregister_device(priv->cec);
 }
@ -1255,6 +1281,7 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv)
 	struct device_node *np = client->dev.of_node;
 	u32 video;
 	int rev_lo, rev_hi, ret;
+	unsigned short cec_addr;

 	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3);
 	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
@ -1262,12 +1289,16 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv)

 	priv->current_page = 0xff;
 	priv->hdmi = client;
-	priv->cec = i2c_new_dummy(client->adapter, 0x34);
+	/* CEC I2C address bound to TDA998x I2C addr by configuration pins */
+	cec_addr = 0x34 + (client->addr & 0x03);
+	priv->cec = i2c_new_dummy(client->adapter, cec_addr);
 	if (!priv->cec)
 		return -ENODEV;

 	priv->dpms = DRM_MODE_DPMS_OFF;

+	mutex_init(&priv->mutex);	/* protect the page access */
+
 	/* wake up the device: */
 	cec_write(priv, REG_CEC_ENAMODS,
 			CEC_ENAMODS_EN_RXSENS | CEC_ENAMODS_EN_HDMI);
@ -1323,8 +1354,9 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv)
 	if (client->irq) {
 		int irqf_trigger;

-		/* init read EDID waitqueue */
+		/* init read EDID waitqueue and HDP work */
 		init_waitqueue_head(&priv->wq_edid);
+		INIT_DELAYED_WORK(&priv->dwork, tda998x_hpd);

 		/* clear pending interrupts */
 		reg_read(priv, REG_INT_FLAGS_0);
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@ -845,7 +845,6 @@ void cik_sdma_vm_write_pages(struct radeon_device *rdev,
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 			if (flags & R600_PTE_SYSTEM) {
 				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
 			} else if (flags & R600_PTE_VALID) {
 				value = addr;
 			} else {
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@ -372,7 +372,6 @@ void cayman_dma_vm_write_pages(struct radeon_device *rdev,
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 			if (flags & R600_PTE_SYSTEM) {
 				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
 			} else if (flags & R600_PTE_VALID) {
 				value = addr;
 			} else {
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@ -644,6 +644,7 @@ int r100_pci_gart_init(struct radeon_device *rdev)
 		return r;
 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
+	rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 	return radeon_gart_table_ram_alloc(rdev);
 }
@ -681,11 +682,16 @@ void r100_pci_gart_disable(struct radeon_device *rdev)
 	WREG32(RADEON_AIC_HI_ADDR, 0);
 }

+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
+{
+	return addr;
+}
+
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr, uint32_t flags)
+			    uint64_t entry)
 {
 	u32 *gtt = rdev->gart.ptr;
-	gtt[i] = cpu_to_le32(lower_32_bits(addr));
+	gtt[i] = cpu_to_le32(lower_32_bits(entry));
 }

 void r100_pci_gart_fini(struct radeon_device *rdev)
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@ -73,11 +73,8 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)

-void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-			      uint64_t addr, uint32_t flags)
+uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags)
 {
-	void __iomem *ptr = rdev->gart.ptr;
-
 	addr = (lower_32_bits(addr) >> 8) |
 		((upper_32_bits(addr) & 0xff) << 24);
 	if (flags & RADEON_GART_PAGE_READ)
@ -86,10 +83,18 @@ void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
 		addr |= R300_PTE_WRITEABLE;
 	if (!(flags & RADEON_GART_PAGE_SNOOP))
 		addr |= R300_PTE_UNSNOOPED;
+	return addr;
+}
+
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+			      uint64_t entry)
+{
+	void __iomem *ptr = rdev->gart.ptr;
+
 	/* on x86 we want this to be CPU endian, on powerpc
 	 * on powerpc without HW swappers, it'll get swapped on way
 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
-	writel(addr, ((void __iomem *)ptr) + (i * 4));
+	writel(entry, ((void __iomem *)ptr) + (i * 4));
 }

 int rv370_pcie_gart_init(struct radeon_device *rdev)
@ -109,6 +114,7 @@ int rv370_pcie_gart_init(struct radeon_device *rdev)
 		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 	rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
+	rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry;
 	rdev->asic->gart.set_page = &rv370_pcie_gart_set_page;
 	return radeon_gart_table_vram_alloc(rdev);
 }
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@ -242,6 +242,7 @@ bool radeon_get_bios(struct radeon_device *rdev);
 * Dummy page
 */
 struct radeon_dummy_page {
+	uint64_t	entry;
 	struct page	*page;
 	dma_addr_t	addr;
 };
@ -645,7 +646,7 @@ struct radeon_gart {
 	unsigned			num_cpu_pages;
 	unsigned			table_size;
 	struct page			**pages;
-	dma_addr_t			*pages_addr;
+	uint64_t			*pages_entry;
 	bool				ready;
 };

@ -1858,8 +1859,9 @@ struct radeon_asic {
 	/* gart */
 	struct {
 		void (*tlb_flush)(struct radeon_device *rdev);
+		uint64_t (*get_page_entry)(uint64_t addr, uint32_t flags);
 		void (*set_page)(struct radeon_device *rdev, unsigned i,
-				 uint64_t addr, uint32_t flags);
+				 uint64_t entry);
 	} gart;
 	struct {
 		int (*init)(struct radeon_device *rdev);
@ -2867,7 +2869,8 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
-#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f))
+#define radeon_gart_get_page_entry(a, f) (rdev)->asic->gart.get_page_entry((a), (f))
+#define radeon_gart_set_page(rdev, i, e) (rdev)->asic->gart.set_page((rdev), (i), (e))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count)))
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@ -159,11 +159,13 @@ void radeon_agp_disable(struct radeon_device *rdev)
 		DRM_INFO("Forcing AGP to PCIE mode\n");
 		rdev->flags |= RADEON_IS_PCIE;
 		rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
+		rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry;
 		rdev->asic->gart.set_page = &rv370_pcie_gart_set_page;
 	} else {
 		DRM_INFO("Forcing AGP to PCI mode\n");
 		rdev->flags |= RADEON_IS_PCI;
 		rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
+		rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 		rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 	}
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
@ -199,6 +201,7 @@ static struct radeon_asic r100_asic = {
 	.mc_wait_for_idle = &r100_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@ -265,6 +268,7 @@ static struct radeon_asic r200_asic = {
 	.mc_wait_for_idle = &r100_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@ -359,6 +363,7 @@ static struct radeon_asic r300_asic = {
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@ -425,6 +430,7 @@ static struct radeon_asic r300_asic_pcie = {
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@ -491,6 +497,7 @@ static struct radeon_asic r420_asic = {
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@ -557,6 +564,7 @@ static struct radeon_asic rs400_asic = {
 	.mc_wait_for_idle = &rs400_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs400_gart_tlb_flush,
+		.get_page_entry = &rs400_gart_get_page_entry,
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
@ -623,6 +631,7 @@ static struct radeon_asic rs600_asic = {
 	.mc_wait_for_idle = &rs600_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs600_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -689,6 +698,7 @@ static struct radeon_asic rs690_asic = {
 	.mc_wait_for_idle = &rs690_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs400_gart_tlb_flush,
+		.get_page_entry = &rs400_gart_get_page_entry,
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
@ -755,6 +765,7 @@ static struct radeon_asic rv515_asic = {
 	.mc_wait_for_idle = &rv515_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@ -821,6 +832,7 @@ static struct radeon_asic r520_asic = {
 	.mc_wait_for_idle = &r520_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@ -915,6 +927,7 @@ static struct radeon_asic r600_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -998,6 +1011,7 @@ static struct radeon_asic rv6xx_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1087,6 +1101,7 @@ static struct radeon_asic rs780_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1189,6 +1204,7 @@ static struct radeon_asic rv770_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1305,6 +1321,7 @@ static struct radeon_asic evergreen_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1395,6 +1412,7 @@ static struct radeon_asic sumo_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1484,6 +1502,7 @@ static struct radeon_asic btc_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@ -1617,6 +1636,7 @@ static struct radeon_asic cayman_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cayman_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@ -1718,6 +1738,7 @@ static struct radeon_asic trinity_asic = {
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cayman_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@ -1849,6 +1870,7 @@ static struct radeon_asic si_asic = {
 	.get_gpu_clock_counter = &si_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &si_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@ -2012,6 +2034,7 @@ static struct radeon_asic ci_asic = {
 	.get_gpu_clock_counter = &cik_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cik_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@ -2121,6 +2144,7 @@ static struct radeon_asic kv_asic = {
 	.get_gpu_clock_counter = &cik_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cik_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@ -67,8 +67,9 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr, uint32_t flags);
+			    uint64_t entry);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@ -172,8 +173,9 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev,
 				struct radeon_fence *fence);
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
+extern uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags);
 extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-				     uint64_t addr, uint32_t flags);
+				     uint64_t entry);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@ -208,8 +210,9 @@ extern void rs400_fini(struct radeon_device *rdev);
 extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags);
+			 uint64_t entry);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@ -232,8 +235,9 @@ int rs600_irq_process(struct radeon_device *rdev);
 void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags);
+			 uint64_t entry);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@ -774,6 +774,8 @@ int radeon_dummy_page_init(struct radeon_device *rdev)
 		rdev->dummy_page.page = NULL;
 		return -ENOMEM;
 	}
+	rdev->dummy_page.entry = radeon_gart_get_page_entry(rdev->dummy_page.addr,
+							    RADEON_GART_PAGE_DUMMY);
 	return 0;
 }

--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@ -165,6 +165,19 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev)
 		radeon_bo_unpin(rdev->gart.robj);
 	radeon_bo_unreserve(rdev->gart.robj);
 	rdev->gart.table_addr = gpu_addr;
+
+	if (!r) {
+		int i;
+
+		/* We might have dropped some GART table updates while it wasn't
+		 * mapped, restore all entries
+		 */
+		for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+			radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
+		mb();
+		radeon_gart_tlb_flush(rdev);
+	}
+
 	return r;
 }

@ -228,7 +241,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	unsigned t;
 	unsigned p;
 	int i, j;
-	u64 page_base;

 	if (!rdev->gart.ready) {
 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
@ -239,14 +251,12 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	for (i = 0; i < pages; i++, p++) {
 		if (rdev->gart.pages[p]) {
 			rdev->gart.pages[p] = NULL;
-			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
-			page_base = rdev->gart.pages_addr[p];
 			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
+				rdev->gart.pages_entry[t] = rdev->dummy_page.entry;
 				if (rdev->gart.ptr) {
-					radeon_gart_set_page(rdev, t, page_base,
-							     RADEON_GART_PAGE_DUMMY);
+					radeon_gart_set_page(rdev, t,
+							     rdev->dummy_page.entry);
 				}
-				page_base += RADEON_GPU_PAGE_SIZE;
 			}
 		}
 	}
@ -274,7 +284,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 {
 	unsigned t;
 	unsigned p;
-	uint64_t page_base;
+	uint64_t page_base, page_entry;
 	int i, j;

 	if (!rdev->gart.ready) {
@ -285,14 +295,15 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);

 	for (i = 0; i < pages; i++, p++) {
-		rdev->gart.pages_addr[p] = dma_addr[i];
 		rdev->gart.pages[p] = pagelist[i];
-		if (rdev->gart.ptr) {
-			page_base = rdev->gart.pages_addr[p];
-			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
-				radeon_gart_set_page(rdev, t, page_base, flags);
-				page_base += RADEON_GPU_PAGE_SIZE;
+		page_base = dma_addr[i];
+		for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
+			page_entry = radeon_gart_get_page_entry(page_base, flags);
+			rdev->gart.pages_entry[t] = page_entry;
+			if (rdev->gart.ptr) {
+				radeon_gart_set_page(rdev, t, page_entry);
 			}
+			page_base += RADEON_GPU_PAGE_SIZE;
 		}
 	}
 	mb();
@ -334,16 +345,15 @@ int radeon_gart_init(struct radeon_device *rdev)
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
-	rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
-					rdev->gart.num_cpu_pages);
-	if (rdev->gart.pages_addr == NULL) {
+	rdev->gart.pages_entry = vmalloc(sizeof(uint64_t) *
+					 rdev->gart.num_gpu_pages);
+	if (rdev->gart.pages_entry == NULL) {
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
 	/* set GART entry to point to the dummy page by default */
-	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
-		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
-	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		rdev->gart.pages_entry[i] = rdev->dummy_page.entry;
 	return 0;
 }

@ -356,15 +366,15 @@ int radeon_gart_init(struct radeon_device *rdev)
 */
 void radeon_gart_fini(struct radeon_device *rdev)
 {
-	if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
+	if (rdev->gart.ready) {
 		/* unbind pages */
 		radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages);
 	}
 	rdev->gart.ready = false;
 	vfree(rdev->gart.pages);
-	vfree(rdev->gart.pages_addr);
+	vfree(rdev->gart.pages_entry);
 	rdev->gart.pages = NULL;
-	rdev->gart.pages_addr = NULL;
+	rdev->gart.pages_entry = NULL;

 	radeon_dummy_page_fini(rdev);
 }
--- a/Show More
+++ b/Show More