185 files changed, 5833 insertions, 2540 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index bf9e9d3b3792..f515a4dbf7a0 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -3,6 +3,7 @@ config ALPHA
 	default y
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_AOUT
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 37b570d01202..fed9c6f44c19 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,6 +16,11 @@
 #define arch_spin_unlock_wait(x) \
 		do { cpu_relax(); } while ((x)->lock)
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+        return lock.lock == 0;
+}
+
 static inline void arch_spin_unlock(arch_spinlock_t * lock)
 {
 	mb();
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 07ab3d203916..7451b447cc2d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -312,6 +312,9 @@ INSTALL_TARGETS	= zinstall uinstall install
 
 PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
 
+bootpImage uImage: zImage
+zImage: Image
+
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
 
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 21fcc440fc1a..b76f9a2ce05d 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -210,7 +210,7 @@
 		};
 
 		uart0: serial@44e09000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 			reg = <0x44e09000 0x2000>;
@@ -221,7 +221,7 @@
 		};
 
 		uart1: serial@48022000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 			reg = <0x48022000 0x2000>;
@@ -232,7 +232,7 @@
 		};
 
 		uart2: serial@48024000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 			reg = <0x48024000 0x2000>;
@@ -243,7 +243,7 @@
 		};
 
 		uart3: serial@481a6000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 			reg = <0x481a6000 0x2000>;
@@ -252,7 +252,7 @@
 		};
 
 		uart4: serial@481a8000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 			reg = <0x481a8000 0x2000>;
@@ -261,7 +261,7 @@
 		};
 
 		uart5: serial@481aa000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 			reg = <0x481aa000 0x2000>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 4a0718ccf68e..e60677e25d62 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -116,7 +116,7 @@
 				ranges = <0 0x2000 0x2000>;
 
 				scm_conf: scm_conf@0 {
-					compatible = "syscon";
+					compatible = "syscon", "simple-bus";
 					reg = <0x0 0x1400>;
 					#address-cells = <1>;
 					#size-cells = <1>;
@@ -397,7 +397,7 @@
 		};
 
 		uart1: serial@4806a000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
 			interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
@@ -408,7 +408,7 @@
 		};
 
 		uart2: serial@4806c000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
 			interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
@@ -419,7 +419,7 @@
 		};
 
 		uart3: serial@48020000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
 			interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
@@ -430,7 +430,7 @@
 		};
 
 		uart4: serial@4806e000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
 			interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
@@ -441,7 +441,7 @@
 		};
 
 		uart5: serial@48066000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
@@ -452,7 +452,7 @@
 		};
 
 		uart6: serial@48068000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
 			interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
@@ -463,7 +463,7 @@
 		};
 
 		uart7: serial@48420000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48420000 0x100>;
 			interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart7";
@@ -472,7 +472,7 @@
 		};
 
 		uart8: serial@48422000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48422000 0x100>;
 			interrupts = <GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart8";
@@ -481,7 +481,7 @@
 		};
 
 		uart9: serial@48424000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48424000 0x100>;
 			interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart9";
@@ -490,7 +490,7 @@
 		};
 
 		uart10: serial@4ae2b000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4ae2b000 0x100>;
 			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart10";
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index e6d13592080d..b57033e8c633 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -181,10 +181,10 @@
 			interrupt-names = "msi";
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 0x7>;
-			interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&clks IMX6QDL_CLK_PCIE_AXI>,
 				 <&clks IMX6QDL_CLK_LVDS1_GATE>,
 				 <&clks IMX6QDL_CLK_PCIE_REF_125M>;
diff --git a/arch/arm/boot/dts/k2e.dtsi b/arch/arm/boot/dts/k2e.dtsi
index 1b6494fbdb91..675fb8e492c6 100644
--- a/arch/arm/boot/dts/k2e.dtsi
+++ b/arch/arm/boot/dts/k2e.dtsi
@@ -131,10 +131,17 @@
 					<GIC_SPI 376 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
+
+		mdio: mdio@24200f00 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x24200f00 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2e-netcp.dtsi"
 	};
 };
-
-&mdio {
-	reg = <0x24200f00 0x100>;
-};
diff --git a/arch/arm/boot/dts/k2hk.dtsi b/arch/arm/boot/dts/k2hk.dtsi
index ae6472407b22..d0810a5f2968 100644
--- a/arch/arm/boot/dts/k2hk.dtsi
+++ b/arch/arm/boot/dts/k2hk.dtsi
@@ -98,6 +98,17 @@
 			#gpio-cells = <2>;
 			gpio,syscon-dev = <&devctrl 0x25c>;
 		};
+
+		mdio: mdio@02090300 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x02090300 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2hk-netcp.dtsi"
 	};
 };
diff --git a/arch/arm/boot/dts/k2l.dtsi b/arch/arm/boot/dts/k2l.dtsi
index 0e007483615e..49fd414f680c 100644
--- a/arch/arm/boot/dts/k2l.dtsi
+++ b/arch/arm/boot/dts/k2l.dtsi
@@ -29,7 +29,6 @@
 	};
 
 	soc {
-
 		/include/ "k2l-clocks.dtsi"
 
 		uart2: serial@02348400 {
@@ -79,6 +78,17 @@
 			#gpio-cells = <2>;
 			gpio,syscon-dev = <&devctrl 0x24c>;
 		};
+
+		mdio: mdio@26200f00 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x26200f00 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2l-netcp.dtsi"
 	};
 };
@@ -96,7 +106,3 @@
        /* Pin muxed. Enabled and configured by Bootloader */
        status = "disabled";
 };
-
-&mdio {
-	reg = <0x26200f00 0x100>;
-};
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index e7a6f6deabb6..72816d65f7ec 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -267,17 +267,6 @@
 				  1 0 0x21000A00 0x00000100>;
 		};
 
-		mdio: mdio@02090300 {
-			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg		= <0x02090300 0x100>;
-			status = "disabled";
-			clocks = <&clkpa>;
-			clock-names = "fck";
-			bus_freq	= <2500000>;
-		};
-
 		kirq0: keystone_irq@26202a0 {
 			compatible = "ti,keystone-irq";
 			interrupts = <GIC_SPI 4 IRQ_TYPE_EDGE_RISING>;
diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi
index 11a7963be003..2390f387c271 100644
--- a/arch/arm/boot/dts/omap2430.dtsi
+++ b/arch/arm/boot/dts/omap2430.dtsi
@@ -51,7 +51,8 @@
 				};
 
 				scm_conf: scm_conf@270 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x270 0x240>;
 					#address-cells = <1>;
 					#size-cells = <1>;
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 7d31c6ff246f..abc4473e6f8a 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -191,7 +191,8 @@
 				};
 
 				omap4_padconf_global: omap4_padconf_global@5a0 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x5a0 0x170>;
 					#address-cells = <1>;
 					#size-cells = <1>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index c8fd648a7108..b1a1263e6001 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -180,7 +180,8 @@
 				};
 
 				omap5_padconf_global: omap5_padconf_global@5a0 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x5a0 0xec>;
 					#address-cells = <1>;
 					#size-cells = <1>;
diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
index bd35b0674ff6..9bc72a3356e4 100644
--- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
+++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
@@ -17,3 +17,13 @@
 		status = "ok";
 	};
 };
+
+&spmi_bus {
+	pm8941@0 {
+		coincell@2800 {
+			status = "ok";
+			qcom,rset-ohms = <2100>;
+			qcom,vset-millivolts = <3000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/qcom-pm8941.dtsi b/arch/arm/boot/dts/qcom-pm8941.dtsi
index aa774e685018..968f1043d4f5 100644
--- a/arch/arm/boot/dts/qcom-pm8941.dtsi
+++ b/arch/arm/boot/dts/qcom-pm8941.dtsi
@@ -125,6 +125,12 @@
 			interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>;
 			qcom,external-resistor-micro-ohms = <10000>;
 		};
+
+		coincell@2800 {
+			compatible = "qcom,pm8941-coincell";
+			reg = <0x2800>;
+			status = "disabled";
+		};
 	};
 
 	usid1: pm8941@1 {
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index a75f3289e653..b8f81fb418ce 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -15,6 +15,33 @@
 #include "skeleton.dtsi"
 
 / {
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		enable-method = "ste,dbx500-smp";
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&CPU0>;
+				};
+				core1 {
+					cpu = <&CPU1>;
+				};
+			};
+		};
+		CPU0: cpu@300 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0x300>;
+		};
+		CPU1: cpu@301 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0x301>;
+		};
+	};
+
 	soc {
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -22,32 +49,6 @@
 		interrupt-parent = <&intc>;
 		ranges;
 
-		cpus {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			cpu-map {
-				cluster0 {
-					core0 {
-						cpu = <&CPU0>;
-					};
-					core1 {
-						cpu = <&CPU1>;
-					};
-				};
-			};
-			CPU0: cpu@0 {
-				device_type = "cpu";
-				compatible = "arm,cortex-a9";
-				reg = <0>;
-			};
-			CPU1: cpu@1 {
-				device_type = "cpu";
-				compatible = "arm,cortex-a9";
-				reg = <1>;
-			};
-		};
-
 		ptm@801ae000 {
 			compatible = "arm,coresight-etm3x", "arm,primecell";
 			reg = <0x801ae000 0x1000>;
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1dec80..b48dd4f37f80 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -61,6 +61,7 @@ work_pending:
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
 	b	local_restart			@ ... and off we go
+ENDPROC(ret_fast_syscall)
 
 /*
  * "slow" syscall return path.  "why" tells us if this was a real syscall.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index bd755d97e459..29e2991465cb 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -399,6 +399,9 @@ ENTRY(secondary_startup)
 	sub	lr, r4, r5			@ mmu has been enabled
 	add	r3, r7, lr
 	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
+ARM_BE8(eor	r4, r4, r5)			@ Swap r5 and r4 in BE:
+ARM_BE8(eor	r5, r4, r5)			@ it can be done in 3 steps
+ARM_BE8(eor	r4, r4, r5)			@ without using a temp reg.
 	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
 	badr	lr, __enable_mmu		@ return address
 	mov	r13, r12			@ __secondary_switched address
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index efe17dd9b921..54a5aeab988d 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -296,7 +296,6 @@ static bool tk_is_cntvct(const struct timekeeper *tk)
  */
 void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec xtime_coarse;
 	struct timespec64 *wtm = &tk->wall_to_monotonic;
 
 	if (!cntvct_ok) {
@@ -308,10 +307,10 @@ void update_vsyscall(struct timekeeper *tk)
 
 	vdso_write_begin(vdso_data);
 
-	xtime_coarse = __current_kernel_time();
 	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
-	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
-	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
+	vdso_data->xtime_coarse_nsec		= (u32)(tk->tkr_mono.xtime_nsec >>
+							tk->tkr_mono.shift);
 	vdso_data->wtm_clock_sec		= wtm->tv_sec;
 	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
 
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 3e58d710013c..4b39af2dfda9 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -96,7 +96,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 	}
 
 	/* the mmap semaphore is taken only if not in an atomic context */
-	atomic = in_atomic();
+	atomic = faulthandler_disabled();
 
 	if (!atomic)
 		down_read(&current->mm->mmap_sem);
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 6001f1c9d136..4a87e86dec45 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -146,9 +146,8 @@ static __init int exynos4_pm_init_power_domain(void)
 		pd->base = of_iomap(np, 0);
 		if (!pd->base) {
 			pr_warn("%s: failed to map memory\n", __func__);
-			kfree(pd->pd.name);
+			kfree_const(pd->pd.name);
 			kfree(pd);
-			of_node_put(np);
 			continue;
 		}
 
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 8e52621b5a6b..e1d2e991d17a 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = {
 	.irq_mask		= wakeupgen_mask,
 	.irq_unmask		= wakeupgen_unmask,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= irq_chip_set_type_parent,
 	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 9d259d94e429..1160434eece0 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -14,7 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 VDSO_LDFLAGS += -nostdlib -shared
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id)
-VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd)
+VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd)
 
 obj-$(CONFIG_VDSO) += vdso.o
 extra-$(CONFIG_VDSO) += vdso.lds
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index ec37ab3f524f..97bc68f4c689 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -199,16 +199,15 @@ up_fail:
  */
 void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec xtime_coarse;
 	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
 
-	xtime_coarse = __current_kernel_time();
 	vdso_data->use_syscall			= use_syscall;
-	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
-	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
+	vdso_data->xtime_coarse_nsec		= tk->tkr_mono.xtime_nsec >>
+							tk->tkr_mono.shift;
 	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index f02530e726f6..85c57158dcd9 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_abt32(vcpu, false, addr);
-
-	inject_abt64(vcpu, false, addr);
+	else
+		inject_abt64(vcpu, false, addr);
 }
 
 /**
@@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_abt32(vcpu, true, addr);
-
-	inject_abt64(vcpu, true, addr);
+	else
+		inject_abt64(vcpu, true, addr);
 }
 
 /**
@@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_undef32(vcpu);
-
-	inject_undef64(vcpu);
+	else
+		inject_undef64(vcpu);
 }
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 753a6237f99a..0b6b40d37b95 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -57,7 +57,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -91,6 +91,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -287,7 +288,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -320,7 +320,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -345,6 +344,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -355,6 +355,7 @@ CONFIG_ARIADNE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -363,6 +364,7 @@ CONFIG_HYDRA=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -448,6 +450,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -536,6 +539,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -543,6 +547,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -571,14 +576,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 1f93dcaf02e5..eeb3a8991fc4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -279,7 +280,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -302,7 +302,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -327,17 +326,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -406,6 +408,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -494,6 +497,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -501,6 +505,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -529,14 +534,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 831b8b8b92ad..3a7006654ce9 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -283,7 +284,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -311,7 +311,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -336,6 +335,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -343,11 +343,13 @@ CONFIG_ATARILANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -428,6 +430,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -516,6 +519,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -523,6 +527,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -551,14 +556,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 91fd187c16d5..0586b323a673 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -277,7 +278,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -301,7 +301,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -326,17 +325,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 9d4934f1d2c3..ad1dbce07aa4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -279,7 +280,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -302,7 +302,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -327,6 +326,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -334,11 +334,13 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -408,6 +410,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -496,6 +499,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -503,6 +507,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -531,14 +536,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 72bc187ca995..b44acacaecf4 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -54,7 +54,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -88,6 +88,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -282,7 +283,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -311,7 +311,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -343,6 +342,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -351,12 +351,14 @@ CONFIG_MACMACE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
 CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -430,6 +432,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -518,6 +521,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -525,6 +529,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -553,14 +558,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 8fb65535597f..8afca3753db1 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -64,7 +64,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -98,6 +98,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -301,7 +302,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -344,7 +344,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -376,6 +375,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -391,6 +391,7 @@ CONFIG_MACMACE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 CONFIG_BVME6000_NET=y
 CONFIG_MVME16x_NET=y
@@ -403,6 +404,7 @@ CONFIG_NE2000=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -510,6 +512,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -598,6 +601,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -605,6 +609,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -633,14 +638,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index f34491ec0126..ef00875994d9 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -52,7 +52,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -86,6 +86,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -276,7 +277,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -300,7 +300,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -325,6 +324,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -332,11 +332,13 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 3d3614d1b041..387c2bd90ff1 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -277,7 +278,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -301,7 +301,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -326,17 +325,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 643e9c93bea7..35355c1bc714 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -280,7 +281,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -307,7 +307,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -332,6 +331,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -341,12 +341,14 @@ CONFIG_VETH=m
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -421,6 +423,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -509,6 +512,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -516,6 +520,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -544,14 +549,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8fecc5aa166c..8442d267b877 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -50,7 +50,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -84,6 +84,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -274,7 +275,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -298,7 +298,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -323,17 +322,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -400,6 +402,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 9902c5bfbdc8..0e1b542e1555 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -50,7 +50,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -84,6 +84,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -274,7 +275,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -298,7 +298,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -323,6 +322,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -330,11 +330,13 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -400,6 +402,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -488,6 +491,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -495,6 +499,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -523,14 +528,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/kernel/bootinfo_proc.c b/arch/m68k/kernel/bootinfo_proc.c
index 7ee853e1432b..2a33a9645ad8 100644
--- a/arch/m68k/kernel/bootinfo_proc.c
+++ b/arch/m68k/kernel/bootinfo_proc.c
@@ -62,12 +62,10 @@ static int __init init_bootinfo_procfs(void)
 	if (!bootinfo_size)
 		return -EINVAL;
 
-	bootinfo_copy = kmalloc(bootinfo_size, GFP_KERNEL);
+	bootinfo_copy = kmemdup(bootinfo_tmp, bootinfo_size, GFP_KERNEL);
 	if (!bootinfo_copy)
 		return -ENOMEM;
 
-	memcpy(bootinfo_copy, bootinfo_tmp, bootinfo_size);
-
 	pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_fops, NULL);
 	if (!pde) {
 		kfree(bootinfo_copy);
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index af42e7003f12..baa7b6fc0a60 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -407,7 +407,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.set	noat
 	SAVE_ALL
 	FEXPORT(handle_\exception\ext)
-	__BUILD_clear_\clear
+	__build_clear_\clear
 	.set	at
 	__BUILD_\verbose \exception
 	move	a0, sp
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index ad4d44635c76..a6f6b762c47a 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -80,7 +80,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	daddiu	a1, v0, __NR_64_Linux
+	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 2f			# seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 446cc654da56..4b2010654c46 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -72,7 +72,7 @@ n32_syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	daddiu	a1, v0, __NR_N32_Linux
+	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 2f			# seccomp failed? Skip syscall
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 42e02a2d570b..efc3fa54c90b 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -191,6 +191,9 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 
 	pci_device_add(dev, bus);
 
+	/* Setup MSI caps & disable MSI/MSI-X interrupts */
+	pci_msi_setup_pci_dev(dev);
+
 	return dev;
 }
 EXPORT_SYMBOL(of_create_pci_dev);
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 2938934c6518..e256592eb66e 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
+obj-$(CONFIG_NUMA)		+= numa/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b06dc3839268..4827870f7a6d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,18 +99,22 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_DEVICES if !SMP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -153,6 +157,7 @@ config S390
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
 
+
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 
@@ -385,6 +390,76 @@ config HOTPLUG_CPU
 config SCHED_SMT
 	def_bool n
 
+# Some NUMA nodes have memory ranges that span
+# other nodes.	Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.	See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+	def_bool NUMA
+
+config NUMA
+	bool "NUMA support"
+	depends on SMP && 64BIT && SCHED_TOPOLOGY
+	default n
+	help
+	  Enable NUMA support
+
+	  This option adds NUMA support to the kernel.
+
+	  An operation mode can be selected by appending
+	  numa=<method> to the kernel command line.
+
+	  The default behaviour is identical to appending numa=plain to
+	  the command line. This will create just one node with all
+	  available memory and all CPUs in it.
+
+config NODES_SHIFT
+	int "Maximum NUMA nodes (as a power of 2)"
+	range 1 10
+	depends on NUMA
+	default "4"
+	help
+	  Specify the maximum number of NUMA nodes available on the target
+	  system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+	depends on NUMA
+
+config NUMA_EMU
+	bool "NUMA emulation"
+	default y
+	help
+	  Numa emulation mode will split the available system memory into
+	  equal chunks which then are distributed over the configured number
+	  of nodes in a round-robin manner.
+
+	  The number of fake nodes is limited by the number of available memory
+	  chunks (i.e. memory size / fake size) and the number of supported
+	  nodes in the kernel.
+
+	  The CPUs are assigned to the nodes in a way that partially respects
+	  the original machine topology (if supported by the machine).
+	  Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+	hex "NUMA emulation memory chunk size"
+	default 0x10000000
+	range 0x400000 0x100000000
+	depends on NUMA_EMU
+	help
+	  Select the default size by which the memory is chopped and then
+	  assigned to emulated NUMA nodes.
+
+	  This can be overridden by specifying
+
+	  emu_size=<n>
+
+	  on the kernel command line where also suffixes K, M, G, and T are
+	  supported.
+
+endmenu
+
 config SCHED_MC
 	def_bool n
 
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 667b1bca5681..e8d4423e4f85 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196)   := -march=z196
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 
+export CC_FLAGS_MARCH := $(mflags-y)
+
 aflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 940cbddd9237..0c98f1508542 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -50,6 +51,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index d793fec91797..82083e1fbdc4 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -49,6 +50,7 @@ CONFIG_DEFAULT_DEADLINE=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 38a77e9c8aa6..c05c9e0821e3 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -13,6 +13,8 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -48,6 +50,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5566ce80abdb..0b9b95f3c703 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -24,6 +24,7 @@
 #include <crypto/algapi.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include "crypt_s390.h"
@@ -976,7 +977,7 @@ static void __exit aes_s390_fini(void)
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
 module_exit(aes_s390_fini);
 
 MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 9e05cc453a40..fba1c10a2dd0 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/des.h>
@@ -616,7 +617,7 @@ static void __exit des_s390_exit(void)
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
 module_exit(des_s390_exit);
 
 MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index b258110da952..26e14efd30a7 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -9,6 +9,7 @@
 
 #include <crypto/internal/hash.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "crypt_s390.h"
 
@@ -158,7 +159,7 @@ static void __exit ghash_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
 module_exit(ghash_mod_exit);
 
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 9d5192c94963..b8045b97f4fb 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
@@ -914,6 +915,5 @@ static void __exit prng_exit(void)
 	}
 }
 
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
 module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5b2bee323694..9208eadae9f0 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -26,6 +26,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -100,7 +101,7 @@ static void __exit sha1_s390_fini(void)
 	crypto_unregister_shash(&alg);
 }
 
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
 module_exit(sha1_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index b74ff158108c..667888f5c964 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -16,6 +16,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -140,7 +141,7 @@ static void __exit sha256_s390_fini(void)
 	crypto_unregister_shash(&sha256_alg);
 }
 
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
 module_exit(sha256_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 0c36989ba182..2ba66b1518f0 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "sha.h"
 #include "crypt_s390.h"
@@ -148,7 +149,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha384_alg);
 }
 
-module_init(init);
+module_cpu_feature_match(MSA, init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 000000000000..fa7e69b7c299
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags.  Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now.  Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES	(8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES	MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat)	ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d7697ab802f6..17a373576868 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 	__ctl_load(reg, cr, cr);
 }
 
+void __ctl_set_vx(void);
+
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index 629b79a93165..f7e5c36688c3 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
 void etr_switch_to_local(void);
 void etr_sync_check(void);
 
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
 /* STP interruption parameter */
 struct stp_irq_parm {
 	unsigned int _pad0	: 14;
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
new file mode 100644
index 000000000000..55dc2c0fb40a
--- /dev/null
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -0,0 +1,110 @@
+/*
+ * General floating pointer and vector register helpers
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#define FPU_USE_VX		1	/* Vector extension is active */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/linkage.h>
+#include <asm/ctl_reg.h>
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	__u32 flags;
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
+};
+
+void save_fpu_regs(void);
+
+#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
+#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		*(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	if (is_vx_fpu(fpu))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	if (is_vx_fpu(fpu))
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+#endif
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acbe1f9d..3d012e071647 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
+#include <asm/fpu-internal.h>
 #include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
@@ -258,6 +259,9 @@ struct kvm_vcpu_stat {
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
+	u32 diagnose_258;
+	u32 diagnose_308;
+	u32 diagnose_500;
 };
 
 #define PGM_OPERATION			0x01
@@ -498,10 +502,9 @@ struct kvm_guestdbg_info_arch {
 
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
-	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
-	s390_fp_regs      guest_fpregs;
-	struct kvm_s390_vregs	*host_vregs;
+	struct fpu	  host_fpregs;
+	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -630,7 +633,6 @@ extern char sie_exit;
 
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
-static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index fc8a8284778e..27da78cf416d 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -6,4 +6,26 @@
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)	\
+	".section __ex_table,\"a\"\n"	\
+	".align	4\n"			\
+	".long	(" #_fault ") - .\n"	\
+	".long	(" #_target ") - .\n"	\
+	".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)	\
+	.section __ex_table,"a"	;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 000000000000..a9e834e60b84
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 000000000000..2a0efc63b9e5
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,35 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a648338c434a..34d960353a08 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -170,7 +170,11 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
 #endif /* CONFIG_HOTPLUG_PCI_S390 */
 
 /* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+	return pdev->sysdata;
+}
+
 struct zpci_dev *get_zdev_by_fid(u32);
 
 /* DMA */
@@ -188,4 +192,20 @@ void zpci_debug_init_device(struct zpci_dev *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
 #endif
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f66d82798a6a..bdb2f51124ed 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -576,6 +576,19 @@ static inline int pte_same(pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	/* pmd_large(pmd) implies pmd_present(pmd) */
+	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 	unsigned long new = 0;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dedb6218544b..085fb0d3c54e 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,10 +14,12 @@
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore vector registers */
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
+#define _CIF_FPU		(1<<CIF_FPU)
 
 #ifndef __ASSEMBLY__
 
@@ -28,6 +30,7 @@
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
+#include <asm/fpu-internal.h>
 
 static inline void set_cpu_flag(int flag)
 {
@@ -85,7 +88,7 @@ typedef struct {
  * Thread structure
  */
 struct thread_struct {
-	s390_fp_regs fp_regs;
+	struct fpu fpu;			/* FP and VX register save area */
 	unsigned int  acrs[NUM_ACRS];
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
@@ -101,7 +104,6 @@ struct thread_struct {
 	struct runtime_instr_cb *ri_cb;
 	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
-	__vector128 *vxrs;		/* Vector register save area */
 };
 
 /* Flag to disable transactions. */
@@ -231,6 +233,17 @@ static inline void __load_psw_mask (unsigned long mask)
 }
 
 /*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+	unsigned int reg1, reg2;
+
+	asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+/*
  * Rewind PSW instruction address by specified number of bytes.
  */
 static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
@@ -336,25 +349,6 @@ extern void memcpy_absolute(void *, void *, size_t);
 	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
 }
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)	\
-	".section __ex_table,\"a\"\n"	\
-	".align	4\n"			\
-	".long	(" #_fault ") - .\n"	\
-	".long	(" #_target ") - .\n"	\
-	".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target)	\
-	.section __ex_table,"a"	;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index f6ff06077631..821dde5f425d 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,6 @@ int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 void sclp_early_detect(void);
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index d62e7a69605f..dcadfde32265 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,139 +8,12 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
+#include <asm/fpu-internal.h>
 #include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
 
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
-	asm volatile(
-		"       stfpc   %0\n"
-		: "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
-	int rc;
-
-	asm volatile(
-		"	lfpc    %1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
-	asm volatile("std 0,%0" : "=Q" (fprs[0]));
-	asm volatile("std 2,%0" : "=Q" (fprs[2]));
-	asm volatile("std 4,%0" : "=Q" (fprs[4]));
-	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	asm volatile("std 1,%0" : "=Q" (fprs[1]));
-	asm volatile("std 3,%0" : "=Q" (fprs[3]));
-	asm volatile("std 5,%0" : "=Q" (fprs[5]));
-	asm volatile("std 7,%0" : "=Q" (fprs[7]));
-	asm volatile("std 8,%0" : "=Q" (fprs[8]));
-	asm volatile("std 9,%0" : "=Q" (fprs[9]));
-	asm volatile("std 10,%0" : "=Q" (fprs[10]));
-	asm volatile("std 11,%0" : "=Q" (fprs[11]));
-	asm volatile("std 12,%0" : "=Q" (fprs[12]));
-	asm volatile("std 13,%0" : "=Q" (fprs[13]));
-	asm volatile("std 14,%0" : "=Q" (fprs[14]));
-	asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
-	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
-	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
-	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
-	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
-	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
-	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
-	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
-	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
-	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
-	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
-	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
-	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
-	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
-	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
-	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
-	unsigned long cr0, flags;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_set_bit(0, 17);
-	__ctl_set_bit(0, 18);
-	save_vx_regs(vxrs);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		: : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		save_vx_regs(task->thread.vxrs);
-	else
-		save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		restore_vx_regs(task->thread.vxrs);
-	else
-		restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
 static inline void save_access_regs(unsigned int *acrs)
 {
 	typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -157,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
-		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
-		save_fp_vx_regs(prev);					\
+		save_fpu_regs();					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
-		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
-		restore_fp_vx_regs(next);				\
+		set_cpu_flag(CIF_FPU);					\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 	}								\
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 4990f6c66288..27ebde643933 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -2,6 +2,7 @@
 #define _ASM_S390_TOPOLOGY_H
 
 #include <linux/cpumask.h>
+#include <asm/numa.h>
 
 struct sysinfo_15_1_x;
 struct cpu;
@@ -13,6 +14,7 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short book_id;
+	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
@@ -52,6 +54,43 @@ static inline void topology_expect_change(void) { }
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VH		(3)
 
+#define SD_BOOK_INIT	SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+	return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 91f56b1d8156..525cef73b085 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -11,16 +11,24 @@
 
 #define __IGNORE_time
 
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-
-/* Ignore system calls that are also reachable via sys_socket */
+/* Ignore system calls that are also reachable via sys_socketcall */
 #define __IGNORE_recvmmsg
 #define __IGNORE_sendmmsg
+#define __IGNORE_socket
+#define __IGNORE_socketpair
+#define __IGNORE_bind
+#define __IGNORE_connect
+#define __IGNORE_listen
+#define __IGNORE_accept4
+#define __IGNORE_getsockopt
+#define __IGNORE_setsockopt
+#define __IGNORE_getsockname
+#define __IGNORE_getpeername
+#define __IGNORE_sendto
+#define __IGNORE_sendmsg
+#define __IGNORE_recvfrom
+#define __IGNORE_recvmsg
+#define __IGNORE_shutdown
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 000000000000..4a3135620f5e
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID	       255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd:	Operand to store register number
+ * @r64:	String designation register in the format "%rN"
+ */
+.macro	GR_NUM	opd gr
+	\opd = REG_NUM_INVALID
+	.ifc \gr,%r0
+		\opd = 0
+	.endif
+	.ifc \gr,%r1
+		\opd = 1
+	.endif
+	.ifc \gr,%r2
+		\opd = 2
+	.endif
+	.ifc \gr,%r3
+		\opd = 3
+	.endif
+	.ifc \gr,%r4
+		\opd = 4
+	.endif
+	.ifc \gr,%r5
+		\opd = 5
+	.endif
+	.ifc \gr,%r6
+		\opd = 6
+	.endif
+	.ifc \gr,%r7
+		\opd = 7
+	.endif
+	.ifc \gr,%r8
+		\opd = 8
+	.endif
+	.ifc \gr,%r9
+		\opd = 9
+	.endif
+	.ifc \gr,%r10
+		\opd = 10
+	.endif
+	.ifc \gr,%r11
+		\opd = 11
+	.endif
+	.ifc \gr,%r12
+		\opd = 12
+	.endif
+	.ifc \gr,%r13
+		\opd = 13
+	.endif
+	.ifc \gr,%r14
+		\opd = 14
+	.endif
+	.ifc \gr,%r15
+		\opd = 15
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid general-purpose register designation: \gr"
+	.endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v)		(v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd:	Operand to store register number
+ * @vxr:	String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.  To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro	VX_NUM	opd vxr
+	\opd = REG_NUM_INVALID
+	.ifc \vxr,%v0
+		\opd = 0
+	.endif
+	.ifc \vxr,%v1
+		\opd = 1
+	.endif
+	.ifc \vxr,%v2
+		\opd = 2
+	.endif
+	.ifc \vxr,%v3
+		\opd = 3
+	.endif
+	.ifc \vxr,%v4
+		\opd = 4
+	.endif
+	.ifc \vxr,%v5
+		\opd = 5
+	.endif
+	.ifc \vxr,%v6
+		\opd = 6
+	.endif
+	.ifc \vxr,%v7
+		\opd = 7
+	.endif
+	.ifc \vxr,%v8
+		\opd = 8
+	.endif
+	.ifc \vxr,%v9
+		\opd = 9
+	.endif
+	.ifc \vxr,%v10
+		\opd = 10
+	.endif
+	.ifc \vxr,%v11
+		\opd = 11
+	.endif
+	.ifc \vxr,%v12
+		\opd = 12
+	.endif
+	.ifc \vxr,%v13
+		\opd = 13
+	.endif
+	.ifc \vxr,%v14
+		\opd = 14
+	.endif
+	.ifc \vxr,%v15
+		\opd = 15
+	.endif
+	.ifc \vxr,%v16
+		\opd = 16
+	.endif
+	.ifc \vxr,%v17
+		\opd = 17
+	.endif
+	.ifc \vxr,%v18
+		\opd = 18
+	.endif
+	.ifc \vxr,%v19
+		\opd = 19
+	.endif
+	.ifc \vxr,%v20
+		\opd = 20
+	.endif
+	.ifc \vxr,%v21
+		\opd = 21
+	.endif
+	.ifc \vxr,%v22
+		\opd = 22
+	.endif
+	.ifc \vxr,%v23
+		\opd = 23
+	.endif
+	.ifc \vxr,%v24
+		\opd = 24
+	.endif
+	.ifc \vxr,%v25
+		\opd = 25
+	.endif
+	.ifc \vxr,%v26
+		\opd = 26
+	.endif
+	.ifc \vxr,%v27
+		\opd = 27
+	.endif
+	.ifc \vxr,%v28
+		\opd = 28
+	.endif
+	.ifc \vxr,%v29
+		\opd = 29
+	.endif
+	.ifc \vxr,%v30
+		\opd = 30
+	.endif
+	.ifc \vxr,%v31
+		\opd = 31
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid vector register designation: \vxr"
+	.endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb:	Operand to store computed RXB value
+ * @v1:		First vector register designated operand
+ * @v2:		Second vector register designated operand
+ * @v3:		Third vector register designated operand
+ * @v4:		Fourth vector register designated operand
+ */
+.macro	RXB	rxb v1 v2=0 v3=0 v4=0
+	\rxb = 0
+	.if \v1 & 0x10
+		\rxb = \rxb | 0x08
+	.endif
+	.if \v2 & 0x10
+		\rxb = \rxb | 0x04
+	.endif
+	.if \v3 & 0x10
+		\rxb = \rxb | 0x02
+	.endif
+	.if \v4 & 0x10
+		\rxb = \rxb | 0x01
+	.endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m:		Element size control
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXB	m v1 v2=0 v3=0 v4=0
+	rxb = 0
+	RXB	rxb, \v1, \v2, \v3, \v4
+	.byte	(\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m:		Element size control
+ * @opc:	Opcode
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
+	MRXB	\m, \v1, \v2, \v3, \v4
+	.byte	\opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro	VGBM	vr imm2
+	VX_NUM	v1, \vr
+	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	\imm2
+	MRXBOPC	0, 0x44, v1
+.endm
+.macro	VZERO	vxr
+	VGBM	\vxr, 0
+.endm
+.macro	VONE	vxr
+	VGBM	\vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro	VLVG	v, gr, disp, m
+	VX_NUM	v1, \v
+	GR_NUM	b2, "%r0"
+	GR_NUM	r3, \gr
+	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x22, v1
+.endm
+.macro	VLVGB	v, gr, index, base
+	VLVG	\v, \gr, \index, \base, 0
+.endm
+.macro	VLVGH	v, gr, index
+	VLVG	\v, \gr, \index, 1
+.endm
+.macro	VLVGF	v, gr, index
+	VLVG	\v, \gr, \index, 2
+.endm
+.macro	VLVGG	v, gr, index
+	VLVG	\v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro	VL	v, disp, index="%r0", base
+	VX_NUM	v1, \v
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEB	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro	VLEH	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro	VLEF	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro	VLEG	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro	VLEIx	vr1, imm2, m3, opc
+	VX_NUM	v1, \vr1
+	.word	0xE700 | (VX_R(v1) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEIB	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x40
+.endm
+.macro	VLEIH	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x41
+.endm
+.macro	VLEIF	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x43
+.endm
+.macro	VLEIG	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro	VLGV	gr, vr, disp, base="%r0", m
+	GR_NUM	r1, \gr
+	GR_NUM	b2, \base
+	VX_NUM	v3, \vr
+	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x21, v3
+.endm
+.macro	VLGVB	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 0
+.endm
+.macro	VLGVH	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 1
+.endm
+.macro	VLGVF	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 2
+.endm
+.macro	VLGVG	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro	VLM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro	VSTM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro	VPERM	vr1, vr2, vr3, vr4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro	VUPLL	vr1, vr2, m3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0x0000
+	MRXBOPC	\m3, 0xD4, v1, v2
+.endm
+.macro	VUPLLB	vr1, vr2
+	VUPLL	\vr1, \vr2, 0
+.endm
+.macro	VUPLLH	vr1, vr2
+	VUPLL	\vr1, \vr2, 1
+.endm
+.macro	VUPLLF	vr1, vr2
+	VUPLL	\vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro	VX	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro	VGFM	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	\m4, 0xB4, v1, v2, v3
+.endm
+.macro	VGFMB	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VGFMH	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VGFMF	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VGFMG	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro	VGFMA	vr1, vr2, vr3, vr4, m5
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12) | (\m5 << 8)
+	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro	VGFMAB	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro	VGFMAH	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro	VGFMAF	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro	VGFMAG	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro	VSRLB	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 67878af257a0..59d2bb4e2d0c 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -204,9 +204,9 @@
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind		268
+#define __NR_get_mempolicy	269
+#define __NR_set_mempolicy	270
 #define __NR_mq_open		271
 #define __NR_mq_unlink		272
 #define __NR_mq_timedsend	273
@@ -223,7 +223,7 @@
 #define __NR_inotify_init	284
 #define __NR_inotify_add_watch	285
 #define __NR_inotify_rm_watch	286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages	287
 #define __NR_openat		288
 #define __NR_mkdirat		289
 #define __NR_mknodat		290
@@ -245,7 +245,7 @@
 #define __NR_sync_file_range	307
 #define __NR_tee		308
 #define __NR_vmsplice		309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages		310
 #define __NR_getcpu		311
 #define __NR_epoll_pwait	312
 #define __NR_utimes		313
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ffb87617a36c..b756c6348ac6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -28,6 +28,17 @@ CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 CFLAGS_sysinfo.o += -w
 
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o	+= -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a2da259d9327..48c9af7a7683 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,6 +28,9 @@ int main(void)
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
+	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
+	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
 	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index fe8d6924efaa..eb4664238613 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -153,33 +153,14 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 /* Store registers needed to create the signal frame */
 static void store_sigregs(void)
 {
-	int i;
-
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
-	int i;
-
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +177,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 		return -EFAULT;
 	return 0;
@@ -217,8 +197,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -235,9 +215,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -258,13 +236,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -286,15 +264,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -308,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -330,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -472,7 +452,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 84062e7a77da..247b7aae4c6d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,6 +20,8 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
+#include <asm/fpu-internal.h>
+#include <asm/vx-insn.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -46,10 +48,10 @@ _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		   _TIF_UPROBE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP)
 
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
 
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -73,38 +75,6 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.+8
-	.insn	s,0xb2800000,\newpp
-#endif
-	.endm
-
-	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+62
-	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_critical)
-	clg	\scratch,BASED(.Lsie_critical_length)
-	.if	\reason==1
-	# Some program interrupts are suppressing (e.g. protection).
-	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to .Lrewind_pad
-	jh	.+42
-	.else
-	jhe	.+42
-	.endif
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
-#endif
-	.endm
-
 	.macro	CHECK_STACK stacksize,savearea
 #ifdef CONFIG_CHECK_STACK
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
@@ -113,7 +83,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro	SWITCH_ASYNC savearea,stack,shift
+	.macro	SWITCH_ASYNC savearea,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	lgr	%r14,%r9
@@ -124,26 +94,28 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	brasl	%r14,cleanup_critical
 	tmhh	%r8,0x0001		# retest problem state after cleanup
 	jnz	1f
-0:	lg	%r14,\stack		# are we already on the target stack?
+0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async stack?
 	slgr	%r14,%r15
-	srag	%r14,%r14,\shift
-	jnz	1f
-	CHECK_STACK 1<<\shift,\savearea
+	srag	%r14,%r14,STACK_SHIFT
+	jnz	2f
+	CHECK_STACK 1<<STACK_SHIFT,\savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	lg	%r15,\stack		# load target stack
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	j	3f
+1:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,\timer
+2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
-	.macro UPDATE_VTIME scratch,enter_timer
-	lg	\scratch,__LC_EXIT_TIMER
-	slg	\scratch,\enter_timer
-	alg	\scratch,__LC_USER_TIMER
-	stg	\scratch,__LC_USER_TIMER
-	lg	\scratch,__LC_LAST_UPDATE_TIMER
-	slg	\scratch,__LC_EXIT_TIMER
-	alg	\scratch,__LC_SYSTEM_TIMER
-	stg	\scratch,__LC_SYSTEM_TIMER
+	.macro UPDATE_VTIME w1,w2,enter_timer
+	lg	\w1,__LC_EXIT_TIMER
+	lg	\w2,__LC_LAST_UPDATE_TIMER
+	slg	\w1,\enter_timer
+	slg	\w2,__LC_EXIT_TIMER
+	alg	\w1,__LC_USER_TIMER
+	alg	\w2,__LC_SYSTEM_TIMER
+	stg	\w1,__LC_USER_TIMER
+	stg	\w2,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 
@@ -197,6 +169,69 @@ ENTRY(__switch_to)
 	br	%r14
 
 .L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	jno	.Lsie_load_guest_gprs
+	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
+.Lsie_load_guest_gprs:
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_enter
+	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
+.Lsie_enter:
+	sie	0(%r14)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_skip
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+.Lsie_skip:
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
@@ -212,9 +247,9 @@ ENTRY(system_call)
 .Lsysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r13
+.Lsysc_vtime:
+	UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -244,8 +279,6 @@ ENTRY(system_call)
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
-	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	.Lsysc_restore
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	tm	__TI_flags+7(%r12),_TIF_WORK
@@ -280,6 +313,8 @@ ENTRY(system_call)
 	jo	.Lsysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsysc_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
@@ -307,6 +342,13 @@ ENTRY(system_call)
 	j	.Lsysc_return
 
 #
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+	larl	%r14,.Lsysc_return
+	jg	load_fpu_regs
+
+#
 # _TIF_SIGPENDING is set, call do_signal
 #
 .Lsysc_sigpending:
@@ -405,28 +447,35 @@ ENTRY(pgm_check_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,1
 	tmhh	%r8,0x0001		# test problem state bit
-	jnz	1f			# -> fault in user space
-	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	0f			# -> enabled, can't be a double fault
+	jnz	2f			# -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for sie64a
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lsie_critical_start)
+	clg	%r14,BASED(.Lsie_critical_length)
+	jhe	0f
+	brasl	%r14,.Lcleanup_sie
+#endif
+0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	1f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r14
+	j	3f
+2:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	2f
+	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -435,24 +484,28 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	0f
+	jz	4f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0:	REENABLE_IRQS
+4:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
 	sll	%r10,2
-	je	.Lsysc_return
+	je	.Lpgm_return
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	.Lsysc_return
+.Lpgm_return:
+	LOCKDEP_SYS_EXIT
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jno	.Lsysc_restore
+	j	.Lsysc_tif
 
 #
 # PER event in supervisor state, must be kprobes
@@ -462,7 +515,7 @@ ENTRY(pgm_check_handler)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
-	j	.Lsysc_return
+	j	.Lpgm_return
 
 #
 # single stepped system call
@@ -483,15 +536,9 @@ ENTRY(io_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,2
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user?
-	jz	.Lio_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lio_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -587,6 +634,8 @@ ENTRY(io_int_handler)
 	jo	.Lio_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lio_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
@@ -609,6 +658,13 @@ ENTRY(io_int_handler)
 	j	.Lio_return
 
 #
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+	larl	%r14,.Lio_return
+	jg	load_fpu_regs
+
+#
 # _TIF_NEED_RESCHED is set, call schedule
 #
 .Lio_reschedule:
@@ -652,15 +708,9 @@ ENTRY(ext_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,3
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lext_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lext_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -690,6 +740,122 @@ ENTRY(psw_idle)
 	br	%r14
 .Lpsw_idle_end:
 
+/* Store floating-point controls and floating-point or vector extension
+ * registers instead.  A critical section cleanup assures that the registers
+ * are stored even if interrupted for some other work.	The register %r2
+ * designates a struct fpu to store register contents.	If the specified
+ * structure does not contain a register save area, the register store is
+ * omitted (see also comments in arch_dup_task_struct()).
+ *
+ * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
+ * of the register contents at system call or io return.
+ */
+ENTRY(save_fpu_regs)
+	lg	%r2,__LC_CURRENT
+	aghi	%r2,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	stfpc	__THREAD_FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	.Lsave_fpu_regs_done	  # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+.Lsave_fpu_regs_done:
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	br	%r14
+.Lsave_fpu_regs_end:
+
+/* Load floating-point controls and floating-point or vector extension
+ * registers.  A critical section cleanup assures that the register contents
+ * are loaded even if interrupted for some other work.	Depending on the saved
+ * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ *	%r15:	<kernel stack>
+ * The function requires:
+ *	%r4 and __SF_EMPTY+32(%r15)
+ */
+load_fpu_regs:
+	lg	%r4,__LC_CURRENT
+	aghi	%r4,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	lfpc	__THREAD_FPU_fpc(%r4)
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
+.Lload_fpu_regs_vx_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	.Lload_fpu_regs_vx
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_vx:
+	VLM	%v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+	VLM	%v16,%v31,256,%r4
+	j	.Lload_fpu_regs_done
+.Lload_fpu_regs_fp_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	.Lload_fpu_regs_fp
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_fp:
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+.Lload_fpu_regs_done:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	br	%r14
+.Lload_fpu_regs_end:
+
+/* Test and set the vector enablement control in CR0.46 */
+ENTRY(__ctl_set_vx)
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	br	%r14
+.L__ctl_set_vx_end:
+
 .L__critical_end:
 
 /*
@@ -702,9 +868,8 @@ ENTRY(mcck_int_handler)
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
@@ -725,11 +890,7 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
-	tm	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
-	LAST_BREAK %r14
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -764,12 +925,8 @@ ENTRY(mcck_int_handler)
 	lpswe	__LC_RETURN_MCCK_PSW
 
 .Lmcck_panic:
-	lg	%r14,__LC_PANIC_STACK
-	slgr	%r14,%r15
-	srag	%r14,%r14,PAGE_SHIFT
-	jz	0f
 	lg	%r15,__LC_PANIC_STACK
-0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	.Lmcck_skip
 
 #
@@ -819,20 +976,13 @@ stack_overflow:
 	jg	kernel_stack_overflow
 #endif
 
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-
 cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+	jl	.Lcleanup_sie
+#endif
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
@@ -853,8 +1003,54 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	jl	.Lcleanup_idle
+	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
+	jl	.Lcleanup_save_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
+	jl	.Lcleanup_load_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
+	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
+	.align	8
+.Lcleanup_table:
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
+	.quad	save_fpu_regs
+	.quad	.Lsave_fpu_regs_end
+	.quad	load_fpu_regs
+	.quad	.Lload_fpu_regs_end
+	.quad	__ctl_set_vx
+	.quad	.L__ctl_set_vx_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+	.quad	.Lsie_gmap
+	.quad	.Lsie_done
+
+.Lcleanup_sie:
+	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	br	%r14
+#endif
 
 .Lcleanup_system_call:
 	# check if stpt has been executed
@@ -915,7 +1111,7 @@ cleanup_critical:
 	.quad	system_call
 	.quad	.Lsysc_stmg
 	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+36
 	.quad	.Lsysc_vtime+42
 
 .Lcleanup_sysc_tif:
@@ -981,6 +1177,145 @@ cleanup_critical:
 .Lcleanup_idle_insn:
 	.quad	.Lpsw_idle_lpsw
 
+.Lcleanup_save_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
+	jhe	1f
+	lg	%r2,__LC_CURRENT
+0:	# Store floating-point controls
+	stfpc	__THREAD_FPU_fpc(%r2)
+1:	# Load register save area and check if VX is active
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	5f			  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	4f			  # no VX -> store FP regs
+2:	# Store vector registers (V0-V15)
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+3:	# Store vector registers (V16-V31)
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	5f			  # -> done, set CIF_FPU flag
+4:	# Store floating-point registers
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+5:	# Set CIF_FPU flag
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	lg	%r9,48(%r11)		# return from save_fpu_regs
+	br	%r14
+.Lcleanup_save_fpu_fpc_end:
+	.quad	.Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+	.quad	.Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+	.quad	.Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+	.quad	.Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+	.quad	.Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
+	jhe	1f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
+	jhe	6f
+	lg	%r4,__LC_CURRENT
+	lfpc	__THREAD_FPU_fpc(%r4)
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	3f				# -> no VX, load FP regs
+6:	# Set VX-enablement control
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	5f
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+5:	# Load V0 ..V15 registers
+	VLM	%v0,%v15,0,%r4
+4:	# Load V16..V31 registers
+	VLM	%v16,%v31,256,%r4
+	j	1f
+3:	# Clear VX-enablement control for FP
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	2f
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+2:	# Load floating-point registers
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+1:	# Clear CIF_FPU bit
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	lg	%r9,48(%r11)		# return from load_fpu_regs
+	br	%r14
+.Lcleanup_load_fpu_regs_vx_ctl:
+	.quad	.Lload_fpu_regs_vx_ctl
+.Lcleanup_load_fpu_regs_vx:
+	.quad	.Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+	.quad	.Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp_ctl:
+	.quad	.Lload_fpu_regs_fp_ctl
+.Lcleanup_load_fpu_regs_fp:
+	.quad	.Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+	.quad	.Lload_fpu_regs_done
+
+.Lcleanup___ctl_set_vx:
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	lg	%r9,48(%r11)		# return from __ctl_set_vx
+	br	%r14
+
 /*
  * Integer constants
  */
@@ -989,62 +1324,11 @@ cleanup_critical:
 	.quad	.L__critical_start
 .Lcritical_length:
 	.quad	.L__critical_end - .L__critical_start
-
-
 #if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_done
-	LPP	__SF_EMPTY(%r15)		# set guest id
-	sie	0(%r14)
-.Lsie_done:
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
-	nop	0
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
-	br	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
-	j	sie_exit
-
-	.align	8
-.Lsie_critical:
+.Lsie_critical_start:
 	.quad	.Lsie_gmap
 .Lsie_critical_length:
 	.quad	.Lsie_done - .Lsie_gmap
-
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
 
 	.section .rodata, "a"
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 59b7c6470567..1255c6c5353e 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -370,6 +370,7 @@ ENTRY(startup_kdump)
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
 	xc	0xe00(256),0xe00
+	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
@@ -413,9 +414,9 @@ ENTRY(startup_kdump)
 # followed by the facility words.
 
 #if defined(CONFIG_MARCH_Z13)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_Z196)
 	.long 2, 0xc100eff2, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a90299600483..c9dac2139f59 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,12 +44,9 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
 	unsigned char *ipn = (unsigned char *)new;
 
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
-	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
-		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
-	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
-		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
-	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
-		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
 	panic("Corrupted kernel text");
 }
 
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 56b550893593..0ae6f8e74840 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,6 +21,7 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include <asm/ctl_reg.h>
 
 struct mcck_struct {
@@ -164,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
 		__ctl_load(cr0.val, 0, 0);
-		restore_vx_regs((__vector128 *)
-				&S390_lowcore.vector_save_area);
+		asm volatile(
+			"	la	1,%0\n"
+			"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+			"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+			: : "Q" (*(struct vx_array *)
+				 &S390_lowcore.vector_save_area) : "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	/* Revalidate access registers */
@@ -358,4 +363,4 @@ static int __init machine_check_init(void)
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 	return 0;
 }
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index afe05bfb7e00..b973972f6ba5 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,12 +1019,9 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		break;
 	}
 
-	/* The host-program-parameter (hpp) contains the sie control
-	 * block that is set by sie64a() in entry64.S.	Check if hpp
-	 * refers to a valid control block and set sde_regs flags
-	 * accordingly.  This would allow to use hpp values for other
-	 * purposes too.
-	 * For now, simply use a non-zero value as guest indicator.
+	/* The host-program-parameter (hpp) contains the pid of
+	 * the CPU thread as set by sie64a() in entry.S.
+	 * If non-zero assume a guest sample.
 	 */
 	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 8f587d871b9f..f2dac9f0799d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -81,8 +81,38 @@ void release_thread(struct task_struct *dead_task)
 
 void arch_release_task_struct(struct task_struct *tsk)
 {
-	if (tsk->thread.vxrs)
-		kfree(tsk->thread.vxrs);
+	/* Free either the floating-point or the vector register save area */
+	kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	*dst = *src;
+
+	/* Set up a new floating-point register save area */
+	dst->thread.fpu.fpc = 0;
+	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
+	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+				       GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.fprs)
+		return -ENOMEM;
+
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task.  The state is not saved for early kernel threads, for example,
+	 * the init_task, which do not have an allocated save area.
+	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
+	 * state when switching to a different task or returning to user space.
+	 */
+	save_fpu_regs();
+	dst->thread.fpu.fpc = current->thread.fpu.fpc;
+	if (is_vx_task(current))
+		convert_vx_to_fp(dst->thread.fpu.fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
+		       sizeof(freg_t) * __NUM_FPRS);
+	return 0;
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -142,11 +172,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
-	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&p->thread.fp_regs.fpc);
-	save_fp_regs(p->thread.fp_regs.fprs);
-	p->thread.fp_regs.pad = 0;
-	p->thread.vxrs = NULL;
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
@@ -162,7 +187,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
 asmlinkage void execve_tail(void)
 {
-	current->thread.fp_regs.fpc = 0;
+	current->thread.fpu.fpc = 0;
 	asm volatile("sfpc %0" : : "d" (0));
 }
 
@@ -171,8 +196,15 @@ asmlinkage void execve_tail(void)
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-	save_fp_ctl(&fpregs->fpc);
-	save_fp_regs(fpregs->fprs);
+	save_fpu_regs();
+	fpregs->fpc = current->thread.fpu.fpc;
+	fpregs->pad = 0;
+	if (is_vx_task(current))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+		       sizeof(fpregs->fprs));
 	return 1;
 }
 EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index dc488e13b7e3..e6e077ae3990 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -41,6 +41,15 @@ void cpu_init(void)
 }
 
 /*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
+/*
  * show_cpuinfo - Get information on one CPU for use by procfs.
  */
 static int show_cpuinfo(struct seq_file *m, void *v)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index d363c9c322a1..8b1c8e33f184 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
 	struct per_regs old, new;
 
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+	if (MACHINE_HAS_TE) {
 		unsigned long cr, cr_new;
 
 		__ctl_store(cr, 0, 0);
-		cr_new = cr;
-		if (MACHINE_HAS_TE) {
-			/* Set or clear transaction execution TXC bit 8. */
-			cr_new |= (1UL << 55);
-			if (task->thread.per_flags & PER_FLAG_NO_TE)
-				cr_new &= ~(1UL << 55);
-		}
-		if (MACHINE_HAS_VX) {
-			/* Enable/disable of vector extension */
-			cr_new &= ~(1UL << 17);
-			if (task->thread.vxrs)
-				cr_new |= (1UL << 17);
-		}
+		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr_new &= ~(1UL << 55);
 		if (cr_new != cr)
 			__ctl_load(cr_new, 0, 0);
-		if (MACHINE_HAS_TE) {
-			/* Set/clear transaction execution TDC bits 62/63. */
-			__ctl_store(cr, 2, 2);
-			cr_new = cr & ~3UL;
-			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
-				if (task->thread.per_flags &
-				    PER_FLAG_TE_ABORT_RAND_TEND)
-					cr_new |= 1UL;
-				else
-					cr_new |= 2UL;
-			}
-			if (cr_new != cr)
-				__ctl_load(cr_new, 2, 2);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new |= 1UL;
+			else
+				cr_new |= 2UL;
 		}
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
@@ -242,21 +230,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 		tmp <<= BITS_PER_LONG - 32;
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(addr_t *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -387,20 +375,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		if ((unsigned int) data != 0 ||
 		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(addr_t *)((addr_t)
-				child->thread.vxrs + 2*offset) = data;
+				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = data;
+				&child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -621,20 +609,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(__u32 *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -746,20 +734,20 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		if (test_fp_ctl(tmp))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data;
+		child->thread.fpu.fpc = data;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(__u32 *)((addr_t)
-				child->thread.vxrs + 2*offset) = tmp;
+				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = tmp;
+				&child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -952,18 +940,16 @@ static int s390_fpregs_get(struct task_struct *target,
 			   const struct user_regset *regset, unsigned int pos,
 			   unsigned int count, void *kbuf, void __user *ubuf)
 {
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	} else if (target->thread.vxrs) {
-		int i;
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs();
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
 
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			target->thread.fp_regs.fprs[i] =
-				*(freg_t *)(target->thread.vxrs + i);
-	}
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fp_regs, 0, -1);
+				   &fp_regs, 0, -1);
 }
 
 static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +958,33 @@ static int s390_fpregs_set(struct task_struct *target,
 			   const void __user *ubuf)
 {
 	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
 
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	}
+	if (target == current)
+		save_fpu_regs();
 
 	/* If setting FPC, must validate it first. */
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
-		u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 					0, offsetof(s390_fp_regs, fprs));
 		if (rc)
 			return rc;
 		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
 			return -EINVAL;
-		target->thread.fp_regs.fpc = ufpc[0];
+		target->thread.fpu.fpc = ufpc[0];
 	}
 
 	if (rc == 0 && count > 0)
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					target->thread.fp_regs.fprs,
-					offsetof(s390_fp_regs, fprs), -1);
-
-	if (rc == 0) {
-		if (target == current) {
-			restore_fp_ctl(&target->thread.fp_regs.fpc);
-			restore_fp_regs(target->thread.fp_regs.fprs);
-		} else if (target->thread.vxrs) {
-			int i;
-
-			for (i = 0; i < __NUM_VXRS_LOW; i++)
-				*(freg_t *)(target->thread.vxrs + i) =
-					target->thread.fp_regs.fprs[i];
-		}
-	}
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
+
+	if (is_vx_task(target))
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
 
 	return rc;
 }
@@ -1069,11 +1047,11 @@ static int s390_vxrs_low_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
+			save_fpu_regs();
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1089,20 +1067,17 @@ static int s390_vxrs_low_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
-	if (rc == 0) {
+	if (rc == 0)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
-		if (target == current)
-			restore_vx_regs(target->thread.vxrs);
-	}
+			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
 
 	return rc;
 }
@@ -1116,10 +1091,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
-		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+			save_fpu_regs();
+		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
 		       sizeof(vxrs));
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
@@ -1135,18 +1110,15 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
-	if (rc == 0 && target == current)
-		restore_vx_regs(target->thread.vxrs);
-
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
 	return rc;
 }
 
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9f60467938d1..5090d3dad10b 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
+#include <asm/fpu-internal.h>
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount);
 #if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
deleted file mode 100644
index ada0c07fe1a8..000000000000
--- a/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- *		Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
-LC_EXT_NEW_PSW_64	= 0x1b0			# addr of ext int handler 64 bit
-LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
-LC_EXT_INT_CODE		= 0x86			# addr of ext int code
-LC_AR_MODE_ID		= 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-#   R2	= 0 on interrupt, 2 on timeout
-#   R3	= external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
-	stm	%r6,%r15,24(%r15)		# save registers
-	basr	%r13,0				# get base register
-.LbaseS1:
-	ahi	%r15,-96			# create stack frame
-	la	%r8,LC_EXT_NEW_PSW		# register int handler
-	la	%r9,.LextpswS1-.LbaseS1(%r13)
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa1
-	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
-	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
-	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
-	mvc	0(16,%r8),0(%r9)
-	epsw	%r6,%r7				# set current addressing mode
-	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
-	nilh	%r7,0x8000
-	stm	%r6,%r7,0(%r8)
-	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
-	ltr	%r2,%r2
-	jz	.LsetctS1
-	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
-	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
-	al	%r2,.LtimeS1-.LbaseS1(%r13)
-	st	%r2,.LtimeS1-.LbaseS1(%r13)
-	sckc	.LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
-	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
-	l	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r1,~(0x200 | 0x800)		# clear old values
-	nr	%r1,%r0
-	or	%r1,%r6				# set new value
-	st	%r1,.LctlS1-.LbaseS1(%r13)
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
-	st	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r2,2				# return code for timeout
-.LloopS1:
-	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
-.LwaitS1:
-	lh	%r7,LC_EXT_INT_CODE
-	chi	%r7,EXT_IRQ_CLK_COMP		# timeout?
-	je	.LtimeoutS1
-	chi	%r7,EXT_IRQ_SERVICE_SIG		# service int?
-	jne	.LloopS1
-	sr	%r2,%r2
-	l	%r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
-	# restore old handler
-	mvc	0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14				# return to caller
-
-	.align	8
-.LoldpswS1:
-	.long	0, 0, 0, 0			# old ext int PSW
-.LextpswS1:
-	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
-.LextpswS1_64:
-	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
-.LwaitpswS1:
-	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
-.LtimeS1:
-	.quad	0				# current time
-.LctlS1:
-	.long	0				# CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-#   R2	= command word
-#   R3	= sccb address
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#   R3	= sccb response code if R2 = 0
-#
-
-_sclp_servc:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	lr	%r6,%r2				# save command word
-	lr	%r7,%r3				# save sccb address
-.LretryS2:
-	lhi	%r2,1				# error return code
-	.insn	rre,0xb2200000,%r6,%r7		# servc
-	brc	1,.LendS2			# exit if not operational
-	brc	8,.LnotbusyS2			# go on if not busy
-	sr	%r2,%r2				# wait until no longer busy
-	bras	%r14,_sclp_wait_int
-	j	.LretryS2			# retry
-.LnotbusyS2:
-	sr	%r2,%r2				# wait until result
-	bras	%r14,_sclp_wait_int
-	sr	%r2,%r2
-	lh	%r3,6(%r7)
-.LendS2:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-#   R2	= 0 to activate, non-zero to deactivate
-#
-# Returns:
-#   R2	= 0 on success, non-zero on failure
-#
-
-_sclp_setup:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS3:
-	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
-	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
-	ltr	%r2,%r2				# initialization?
-	jz	.LdoinitS3			# go ahead
-	# clear masks
-	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
-	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
-	lr	%r3,%r6				# get sccb address
-	bras	%r14,_sclp_servc		# issue service call
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LerrorS3
-	chi	%r3,0x20			# write mask successful?
-	jne	.LerrorS3
-	# check masks
-	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
-	l	%r1,0(%r2)			# receive mask ok?
-	n	%r1,12(%r2)
-	cl	%r1,0(%r2)
-	jne	.LerrorS3
-	l	%r1,4(%r2)			# send mask ok?
-	n	%r1,8(%r2)
-	cl	%r1,4(%r2)
-	sr	%r2,%r2
-	je	.LendS3
-.LerrorS3:
-	lhi	%r2,1				# error return code
-.LendS3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.LwritemaskS3:
-	.long	0x00780005			# SCLP command for write mask
-.LinitsccbS3:
-	.word	.LinitendS3-.LinitsccbS3
-	.byte	0,0,0,0
-	.word	0
-	.word	0
-	.word	4
-.LinitmaskS3:
-	.long	0x80000000
-	.long	0x40000000
-	.long	0
-	.long	0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-_sclp_print:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS4:
-	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
-	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
-	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
-	sr	%r0,%r0
-	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
-.LinitmtoS4:
-	# initialize mto
-	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
-	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
-.LloopS4:
-	ic	%r0,0(%r2)			# get character
-	ahi	%r2,1
-	ltr	%r0,%r0				# end of string?
-	jz	.LfinalizemtoS4
-	chi	%r0,0x0a			# end of line (NL)?
-	jz	.LfinalizemtoS4
-	stc	%r0,0(%r6,%r7)			# copy to mto
-	la	%r11,0(%r6,%r7)
-	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
-	ahi	%r6,1
-	j	.LloopS4
-.LfinalizemtoS4:
-	sth	%r6,0(%r7)			# update mto length
-	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
-	ar	%r9,%r6
-	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
-	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
-	ar	%r9,%r6
-	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
-	lh	%r9,0(%r8)			# update sccb length
-	ar	%r9,%r6
-	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto address
-	ltr	%r0,%r0				# more characters?
-	jnz	.LinitmtoS4
-	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
-	lr	%r3,%r8
-	bras	%r14,_sclp_servc
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LendS4
-	chi	%r3,0x20			# write data successful?
-	je	.LendS4
-	lhi	%r2,1				# error return code
-.LendS4:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa2
-	ahi	%r15,-80
-	stmh	%r6,%r15,96(%r15)		# store upper register halves
-	basr	%r13,0
-	lmh	%r0,%r15,.Lzeroes-.(%r13)	# clear upper register halves
-.Lesa2:
-	lr	%r10,%r2			# save string pointer
-	lhi	%r2,0
-	bras	%r14,_sclp_setup		# enable console
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lr	%r2,%r10
-	bras	%r14,_sclp_print		# print string
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lhi	%r2,1
-	bras	%r14,_sclp_setup		# disable console
-.LendS5:
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa3
-	lgfr	%r2,%r2				# sign extend return value
-	lmh	%r6,%r15,96(%r15)		# restore upper register halves
-	ahi	%r15,80
-.Lesa3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.Lzeroes:
-	.fill	64,4,0
-
-.LwritedataS4:
-	.long	0x00760005			# SCLP command for write data
-.LwritesccbS4:
-	# sccb
-	.word	.LmtoS4-.LwritesccbS4
-	.byte	0
-	.byte	0,0,0
-	.word	0
-
-	# evbuf
-.LevbufS4:
-	.word	.LmtoS4-.LevbufS4
-	.byte	0x02
-	.byte	0
-	.word	0
-
-.LmdbS4:
-	# mdb
-	.word	.LmtoS4-.LmdbS4
-	.word	1
-	.long	0xd4c4c240
-	.long	1
-
-	# go
-.LgoS4:
-	.word	.LmtoS4-.LgoS4
-	.word	1
-	.long	0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0
-	.byte	0
-	.byte	0,0,0,0,0,0,0
-	.byte	0
-	.word	0
-	.byte	0,0,0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-
-.LmtoS4:
-	.word	.LmtoendS4-.LmtoS4
-	.word	4
-	.word	0x1000
-	.byte	0
-	.byte	0,0,0
-.LmtoendS4:
-
-	# Global constants
-.LsccbS0:
-	.long	_sclp_work_area
-.Lascebc:
-	.long	_ascebc
-
-.section .data,"aw",@progbits
-	.balign 4096
-_sclp_work_area:
-	.fill	4096
-.previous
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
new file mode 100644
index 000000000000..fa0bdff1d413
--- /dev/null
+++ b/arch/s390/kernel/sclp.c
@@ -0,0 +1,160 @@
+/*
+ *    Copyright IBM Corp. 2015
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+	unsigned long cr0, cr0_new, psw_mask, addr;
+	psw_t psw_ext_save, psw_wait;
+
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 | 0x200;
+	__ctl_load(cr0_new, 0, 0);
+
+	psw_ext_save = S390_lowcore.external_new_psw;
+	psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+	S390_lowcore.external_new_psw.mask = psw_mask;
+	psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+	S390_lowcore.ext_int_code = 0;
+
+	do {
+		asm volatile(
+			"	larl	%[addr],0f\n"
+			"	stg	%[addr],%[psw_wait_addr]\n"
+			"	stg	%[addr],%[psw_ext_addr]\n"
+			"	lpswe	%[psw_wait]\n"
+			"0:\n"
+			: [addr] "=&d" (addr),
+			  [psw_wait_addr] "=Q" (psw_wait.addr),
+			  [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+			: [psw_wait] "Q" (psw_wait)
+			: "cc", "memory");
+	} while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+	__ctl_load(cr0, 0, 0);
+	S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+	unsigned int cc;
+
+	do {
+		asm volatile(
+			"	.insn	rre,0xb2200000,%1,%2\n"
+			"	ipm	%0\n"
+			: "=d" (cc) : "d" (cmd), "a" (sccb)
+			: "cc", "memory");
+		cc >>= 28;
+		if (cc == 3)
+			return -EINVAL;
+		_sclp_wait_int();
+	} while (cc != 0);
+	return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+	static unsigned char init_sccb[] = {
+		0x00, 0x1c,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x04,
+		0x80, 0x00, 0x00, 0x00,	0x40, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00
+	};
+	unsigned int *masks;
+	int rc;
+
+	memcpy(_sclp_work_area, init_sccb, 28);
+	masks = (unsigned int *)(_sclp_work_area + 12);
+	if (disable)
+		memset(masks, 0, 16);
+	/* SCLP write mask */
+	rc = _sclp_servc(0x00780005, _sclp_work_area);
+	if (rc)
+		return rc;
+	if ((masks[0] & masks[3]) != masks[0] ||
+	    (masks[1] & masks[2]) != masks[1])
+		return -EIO;
+	return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+	static unsigned char write_head[] = {
+		/* sccb header */
+		0x00, 0x52,					/* 0 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00,		/* 2 */
+		/* evbuf */
+		0x00, 0x4a,					/* 8 */
+		0x02, 0x00, 0x00, 0x00,				/* 10 */
+		/* mdb */
+		0x00, 0x44,					/* 14 */
+		0x00, 0x01,					/* 16 */
+		0xd4, 0xc4, 0xc2, 0x40,				/* 18 */
+		0x00, 0x00, 0x00, 0x01,				/* 22 */
+		/* go */
+		0x00, 0x38,					/* 26 */
+		0x00, 0x01,					/* 28 */
+		0x00, 0x00, 0x00, 0x00,				/* 30 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 34 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 42 */
+		0x00, 0x00, 0x00, 0x00,				/* 50 */
+		0x00, 0x00,					/* 54 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 56 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 64 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 72 */
+		0x00, 0x00,					/* 80 */
+	};
+	static unsigned char write_mto[] = {
+		/* mto	*/
+		0x00, 0x0a,					/* 0 */
+		0x00, 0x04,					/* 2 */
+		0x10, 0x00,					/* 4 */
+		0x00, 0x00, 0x00, 0x00				/* 6 */
+	};
+	unsigned char *ptr, ch;
+	unsigned int count;
+
+	memcpy(_sclp_work_area, write_head, sizeof(write_head));
+	ptr = _sclp_work_area + sizeof(write_head);
+	do {
+		memcpy(ptr, write_mto, sizeof(write_mto));
+		for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+			if (ch == 0x0a)
+				break;
+			ptr[count] = _ascebc[ch];
+		}
+		/* Update length fields in mto, mdb, evbuf and sccb */
+		*(unsigned short *) ptr = count;
+		*(unsigned short *)(_sclp_work_area + 14) += count;
+		*(unsigned short *)(_sclp_work_area + 8) += count;
+		*(unsigned short *)(_sclp_work_area + 0) += count;
+		ptr += count;
+	} while (ch != 0);
+
+	/* SCLP write data */
+	return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+	int rc;
+
+	rc = _sclp_setup(0);
+	if (rc)
+		return rc;
+	rc = _sclp_print(str);
+	if (rc)
+		return rc;
+	return _sclp_setup(1);
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ca070d260af2..ce0cbd6ba7ca 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -62,6 +62,7 @@
 #include <asm/os_info.h>
 #include <asm/sclp.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 #include "entry.h"
 
 /*
@@ -76,7 +77,7 @@ EXPORT_SYMBOL(console_devno);
 unsigned int console_irq = -1;
 EXPORT_SYMBOL(console_irq);
 
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
 int __initdata memory_end_set;
@@ -688,7 +689,7 @@ static void __init setup_memory(void)
 /*
  * Setup hardware capabilities.
  */
-static void __init setup_hwcaps(void)
+static int __init setup_hwcaps(void)
 {
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	struct cpuid cpu_id;
@@ -754,9 +755,11 @@ static void __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_S390_TE;
 
 	/*
-	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+	 * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+	 * instead of facility bit 129.
 	 */
-	if (test_facility(129))
+	if (MACHINE_HAS_VX)
 		elf_hwcap |= HWCAP_S390_VXRS;
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
@@ -793,7 +796,9 @@ static void __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		break;
 	}
+	return 0;
 }
+arch_initcall(setup_hwcaps);
 
 /*
  * Add system information as device randomness
@@ -879,11 +884,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_lowcore();
 	smp_fill_possible_mask();
         cpu_init();
-
-	/*
-	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
-	 */
-	setup_hwcaps();
+	numa_setup();
 
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c551f22ce066..9549af102d75 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -105,32 +105,13 @@ struct rt_sigframe
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		int i;
-
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		int i;
-
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 
 /* Returns non-zero on fault. */
@@ -146,8 +127,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 		return -EFAULT;
 	return 0;
@@ -166,8 +146,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -185,8 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -200,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -220,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -243,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -266,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -400,7 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6f54c175f5c9..c6355e6f3fcc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -532,8 +532,8 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 #ifdef CONFIG_CRASH_DUMP
 
-static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
-				  int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+					 u16 address, int is_boot_cpu)
 {
 	void *lc = (void *)(unsigned long) store_prefix();
 	unsigned long vx_sa;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 1acad02681c4..f3f4a137aef6 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
 SYSCALL(sys_statfs64,compat_sys_statfs64)
 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
 SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL						/* 268 sys_mbind */
-NI_SYSCALL						/* 269 sys_get_mempolicy */
-NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
 SYSCALL(sys_mq_open,compat_sys_mq_open)
 SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
 SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -295,7 +295,7 @@ SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
 SYSCALL(sys_inotify_init,sys_inotify_init)
 SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
 SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
 SYSCALL(sys_openat,compat_sys_openat)
 SYSCALL(sys_mkdirat,compat_sys_mkdirat)
 SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
@@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
 SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
 SYSCALL(sys_tee,compat_sys_tee)
 SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
 SYSCALL(sys_getcpu,compat_sys_getcpu)
 SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
 SYSCALL(sys_utimes,compat_sys_utimes)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9e733d965e08..627887b075a7 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 static int etr_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long clock, old_clock, delay, delta;
+	unsigned long long clock, old_clock, clock_delta, delay, delta;
 	struct clock_sync_data *etr_sync;
 	struct etr_aib *sync_port, *aib;
 	int port;
@@ -789,6 +792,9 @@ static int etr_sync_clock(void *data)
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
 		delta = adjust_time(old_clock, clock, delay);
+		clock_delta = clock - old_clock;
+		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
+					   &clock_delta);
 		etr_sync->fixup_cc = delta;
 		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
@@ -1526,7 +1532,7 @@ void stp_island_check(void)
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta;
+	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
 	int rc;
 
@@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data)
 		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
 		if (rc == 0) {
-			delta = adjust_time(old_clock, get_tod_clock(), 0);
+			new_clock = get_tod_clock();
+			delta = adjust_time(old_clock, new_clock, 0);
+			clock_delta = new_clock - old_clock;
+			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
+						   0, &clock_delta);
 			fixup_clock_comparator(delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5728c5bd44a8..bf05e7fc3e70 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -18,7 +18,10 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -37,8 +40,10 @@ static struct sysinfo_15_1_x *tl_info;
 static int topology_enabled = 1;
 static DECLARE_WORK(topology_work, topology_work_fn);
 
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
 static struct mask_info socket_info;
 static struct mask_info book_info;
 
@@ -188,7 +193,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct cpuid cpu_id;
 
-	spin_lock_irq(&topology_lock);
 	get_cpu_id(&cpu_id);
 	clear_masks();
 	switch (cpu_id.machine) {
@@ -199,7 +203,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 	default:
 		__tl_to_masks_generic(info);
 	}
-	spin_unlock_irq(&topology_lock);
 }
 
 static void topology_update_polarization_simple(void)
@@ -244,10 +247,8 @@ int topology_set_cpu_management(int fc)
 
 static void update_cpu_masks(void)
 {
-	unsigned long flags;
 	int cpu;
 
-	spin_lock_irqsave(&topology_lock, flags);
 	for_each_possible_cpu(cpu) {
 		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
 		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
@@ -259,7 +260,7 @@ static void update_cpu_masks(void)
 			per_cpu(cpu_topology, cpu).book_id = cpu;
 		}
 	}
-	spin_unlock_irqrestore(&topology_lock, flags);
+	numa_update_cpu_topology();
 }
 
 void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +275,21 @@ int arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
 	struct device *dev;
-	int cpu;
+	int cpu, rc = 0;
 
-	if (!MACHINE_HAS_TOPOLOGY) {
-		update_cpu_masks();
-		topology_update_polarization_simple();
-		return 0;
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
 	}
-	store_topology(info);
-	tl_to_masks(info);
 	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
 	for_each_online_cpu(cpu) {
 		dev = get_cpu_device(cpu);
 		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
-	return 1;
+	return rc;
 }
 
 static void topology_work_fn(struct work_struct *work)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 7bea81d8a363..9861613fb35a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 	      "transaction constraint exception")
 
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
 {
 	int si_code = 0;
 	/* FPC[2] is Data Exception Code */
@@ -227,7 +227,7 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 int alloc_vector_registers(struct task_struct *tsk)
 {
 	__vector128 *vxrs;
-	int i;
+	freg_t *fprs;
 
 	/* Allocate vector register save area. */
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -236,15 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 	preempt_disable();
 	if (tsk == current)
-		save_fp_regs(tsk->thread.fp_regs.fprs);
+		save_fpu_regs();
 	/* Copy the 16 floating point registers */
-	for (i = 0; i < 16; i++)
-		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
-	tsk->thread.vxrs = vxrs;
-	if (tsk == current) {
-		__ctl_set_bit(0, 17);
-		restore_vx_regs(vxrs);
-	}
+	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+	fprs = tsk->thread.fpu.fprs;
+	tsk->thread.fpu.vxrs = vxrs;
+	tsk->thread.fpu.flags |= FPU_USE_VX;
+	kfree(fprs);
 	preempt_enable();
 	return 0;
 }
@@ -259,8 +257,8 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
-	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	save_fpu_regs();
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
 		si_code = FPE_FLTINV;
@@ -297,22 +295,22 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
+	save_fpu_regs();
 	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !current->thread.vxrs &&
-	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+	if (MACHINE_HAS_VX && !is_vx_task(current) &&
+	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
 		alloc_vector_registers(current);
 		/* Vector data exception is suppressing, rewind psw. */
 		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		return;
 	}
-	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
 		signal = SIGILL;
 	if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
+		do_fp_trap(regs, current->thread.fpu.fpc);
 	else if (signal)
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 8ad2b34ad151..ee8a18e50a25 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 2a8ddfd12a5b..c4b03f9ed228 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e53d3595a7c8..b9ce650e9e99 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -28,6 +28,7 @@ static atomic64_t virt_timer_elapsed;
 static DEFINE_PER_CPU(u64, mt_cycles[32]);
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
 
 static inline u64 get_vtimer(void)
 {
@@ -85,7 +86,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
 	/* Do MT utilization calculation */
-	if (smp_cpu_mtid) {
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
 		u64 cycles_new[32], *cycles_old;
 		u64 delta, mult, div;
 
@@ -105,6 +107,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 				       sizeof(u64) * (smp_cpu_mtid + 1));
 			}
 		}
+		__this_cpu_write(mt_scaling_jiffies, jiffies_64);
 	}
 
 	user = S390_lowcore.user_timer - ti->user_timer;
@@ -376,4 +379,11 @@ void vtime_init(void)
 {
 	/* set initial cpu timer */
 	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
 }
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index fc7ec95848c3..5fbfb88f8477 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+	vcpu->stat.diagnose_10++;
 
 	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
 	    || start < 2 * PAGE_SIZE)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
-	vcpu->stat.diagnose_10++;
 
 	/*
 	 * We checked for start >= end above, so lets check for the
@@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
 
+	VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+		   vcpu->run->s.regs.gprs[rx]);
+	vcpu->stat.diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
 	switch (parm.subcode) {
 	case 0: /* TOKEN */
+		VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+			   "select mask 0x%llx compare mask 0x%llx",
+			   parm.token_addr, parm.select_mask, parm.compare_mask);
 		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
 			/*
 			 * If the pagefault handshake is already activated,
@@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		 * the cancel, therefore to reduce code complexity, we assume
 		 * all outstanding tokens are already pending.
 		 */
+		VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
 		if (parm.token_addr || parm.select_mask ||
 		    parm.compare_mask || parm.zarch)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
 	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
 
-	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+	vcpu->stat.diagnose_308++;
 	switch (subcode) {
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
 	int ret;
 
+	vcpu->stat.diagnose_500++;
 	/* No virtio-ccw notification? Get out quickly. */
 	if (!vcpu->kvm->arch.css_support ||
 	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e97b3455d7e6..47518a324d75 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
 }
 
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
 {
+	int new_as;
+
 	if (debug_exit_required(vcpu))
 		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
 
 	filter_guest_per_event(vcpu);
+
+	/*
+	 * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+	 * a space-switch event. PER events enforce space-switch events
+	 * for these instructions. So if no PER event for the guest is left,
+	 * we might have to filter the space-switch element out, too.
+	 */
+	if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+		vcpu->arch.sie_block->iprcc = 0;
+		new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+		/*
+		 * If the AS changed from / to home, we had RP, SAC or SACF
+		 * instruction. Check primary and home space-switch-event
+		 * controls. (theoretically home -> home produced no event)
+		 */
+		if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
+		     (pssec(vcpu) || hssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+		/*
+		 * PT, PTI, PR, PC instruction operate on primary AS only. Check
+		 * if the primary-space-switch-event control was or got set.
+		 */
+		if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
+		    (pssec(vcpu) || old_ssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+	}
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c98d89708e99..b277d50dcf76 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -30,7 +30,6 @@
 #define IOINT_SCHID_MASK 0x0000ffff
 #define IOINT_SSID_MASK 0x00030000
 #define IOINT_CSSID_MASK 0x03fc0000
-#define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
@@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
+	preempt_disable();
 	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
+		preempt_enable();
 		return 0;
+	}
+	preempt_enable();
 	return ckc_interrupts_enabled(vcpu);
 }
 
@@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 	li->irq.ext.ext_params2 = 0;
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
-		   0, ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+		   ext.ext_params2);
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_PFAULT_INIT,
 					 0, ext.ext_params2);
@@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (deliver) {
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
 			   mchk.mcic);
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 						 KVM_S390_MCHK,
@@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
 	vcpu->stat.deliver_restart_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
@@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
 	vcpu->stat.deliver_prefix_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_SIGP_SET_PREFIX,
@@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
 		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
 	vcpu->stat.deliver_emergency_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 					 cpu_addr, 0);
@@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
 	vcpu->stat.deliver_external_call++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_EXTERNAL_CALL,
@@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	memset(&li->irq.pgm, 0, sizeof(pgm_info));
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+	VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
 		   pgm_info.code, ilc);
 	vcpu->stat.deliver_program_int++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+	VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
 		   ext.ext_params);
 	vcpu->stat.deliver_service_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-				KVM_S390_INT_PFAULT_DONE, 0,
-				inti->ext.ext_params2);
 		list_del(&inti->list);
 		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
 	}
@@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_INT_PFAULT_DONE, 0,
+						 inti->ext.ext_params2);
+		VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+			   inti->ext.ext_params2);
+
 		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
 				(u16 *)__LC_EXT_INT_CODE);
 		rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 					list);
 	if (inti) {
 		VCPU_EVENT(vcpu, 4,
-			   "interrupt: virtio parm:%x,parm64:%llx",
+			   "deliver: virtio parm: 0x%x,parm64: 0x%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
 		vcpu->stat.deliver_virtio_interrupt++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
@@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		goto no_timer;
 	}
 
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/* underflow */
@@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
 	__set_cpu_idle(vcpu);
 	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
-	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+	VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
 no_timer:
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	kvm_vcpu_block(vcpu);
@@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 	u64 now, sltime;
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/*
@@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   irq->u.pgm.code, 0);
+
 	li->irq.pgm = irq->u.pgm;
 	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
 	return 0;
@@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_irq irq;
 
-	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
-				   0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm.code = code;
 	__inject_prog(vcpu, &irq);
@@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 	struct kvm_s390_irq irq;
 	int rc;
 
-	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
-		   pgm_info->code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
-				   pgm_info->code, 0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm = *pgm_info;
 	rc = __inject_prog(vcpu, &irq);
@@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
-		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+		   irq->u.ext.ext_params2);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
 				   irq->u.ext.ext_params,
-				   irq->u.ext.ext_params2, 2);
+				   irq->u.ext.ext_params2);
 
 	li->irq.ext = irq->u.ext;
 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
@@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
 	uint16_t src_id = irq->u.extcall.code;
 
-	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+	VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
 		   src_id);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
-				   src_id, 0, 2);
+				   src_id, 0);
 
 	/* sending vcpu invalid */
 	if (src_id >= KVM_MAX_VCPUS ||
@@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
 
-	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
 		   irq->u.prefix.address);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
-				   irq->u.prefix.address, 0, 2);
+				   irq->u.prefix.address, 0);
 
 	if (!is_vcpu_stopped(vcpu))
 		return -EBUSY;
@@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_stop_info *stop = &li->irq.stop;
 	int rc = 0;
 
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
 
 	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
 		return -EINVAL;
@@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
 	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
 	return 0;
@@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+	VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
 		   irq->u.emerg.code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
-				   irq->u.emerg.code, 0, 2);
+				   irq->u.emerg.code, 0);
 
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
@@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
 
-	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+	VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
 		   irq->u.mchk.mcic);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
-				   irq->u.mchk.mcic, 2);
+				   irq->u.mchk.mcic);
 
 	/*
 	 * Because repressible machine checks can be indicated along with
@@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_INT_SERVICE:
-		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
 		inti->ext.ext_params = s390int->parm;
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_MCHK:
-		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+		VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
 			 s390int->parm64);
 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (inti->type & IOINT_AI_MASK)
+		if (inti->type & KVM_S390_INT_IO_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
 		else
 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
 	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   irq->u.pgm.code);
 		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f32f843a3631..98df53c01343 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
+#include <asm/etr.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
@@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
+	{ "diagnose_258", VCPU_STAT(diagnose_258) },
+	{ "diagnose_308", VCPU_STAT(diagnose_308) },
+	{ "diagnose_500", VCPU_STAT(diagnose_500) },
 	{ NULL }
 };
 
@@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void)
 }
 
 static struct gmap_notifier gmap_notifier;
+debug_info_t *kvm_s390_dbf;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
@@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void)
 
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+			  void *v)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i;
+	unsigned long long *delta = v;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		kvm->arch.epoch -= *delta;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			vcpu->arch.sie_block->epoch -= *delta;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+	.notifier_call = kvm_clock_sync,
+};
+
 int kvm_arch_hardware_setup(void)
 {
 	gmap_notifier.notifier_call = kvm_gmap_notifier;
 	gmap_register_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+				       &kvm_clock_notifier);
 	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
 	gmap_unregister_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+					 &kvm_clock_notifier);
 }
 
 int kvm_arch_init(void *opaque)
 {
+	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf)
+		return -ENOMEM;
+
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+		debug_unregister(kvm_s390_dbf);
+		return -ENOMEM;
+	}
+
 	/* Register floating interrupt controller interface. */
 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
+void kvm_arch_exit(void)
+{
+	debug_unregister(kvm_s390_dbf);
+}
+
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
 	switch (cap->cap) {
 	case KVM_CAP_S390_IRQCHIP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 		kvm->arch.use_irqchip = 1;
 		r = 0;
 		break;
 	case KVM_CAP_S390_USER_SIGP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
@@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 			r = 0;
 		} else
 			r = -EINVAL;
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+			 r ? "(not available)" : "(success)");
 		break;
 	case KVM_CAP_S390_USER_STSI:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
 		r = 0;
 		break;
@@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 		ret = 0;
+		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+			 kvm->arch.gmap->asce_end);
 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 			ret = -EFAULT;
 		break;
@@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	unsigned int idx;
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		/* enable CMMA only for z10 and later (EDAT_1) */
+		ret = -EINVAL;
+		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+			break;
+
 		ret = -EBUSY;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 		mutex_lock(&kvm->lock);
 		if (atomic_read(&kvm->online_vcpus) == 0) {
 			kvm->arch.use_cmma = 1;
@@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_S390_VM_MEM_CLR_CMMA:
+		ret = -EINVAL;
+		if (!kvm->arch.use_cmma)
+			break;
+
+		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 		mutex_lock(&kvm->lock);
 		idx = srcu_read_lock(&kvm->srcu);
 		s390_reset_cmma(kvm->arch.gmap->mm);
@@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 			}
 		}
 		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 		break;
 	}
 	default:
@@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 		kvm->arch.crypto.aes_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 		get_random_bytes(
 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 		kvm->arch.crypto.dea_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 		kvm->arch.crypto.aes_kw = 0;
 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 		kvm->arch.crypto.dea_kw = 0;
 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 		break;
 	default:
 		mutex_unlock(&kvm->lock);
@@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 	if (gtod_high != 0)
 		return -EINVAL;
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 		return r;
 
 	mutex_lock(&kvm->lock);
+	preempt_disable();
 	kvm->arch.epoch = gtod - host_tod;
 	kvm_s390_vcpu_block_all(kvm);
 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
 	mutex_unlock(&kvm->lock);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 	return 0;
 }
 
@@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
 					 sizeof(gtod_high)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (r)
 		return r;
 
+	preempt_disable();
 	gtod = host_tod + kvm->arch.epoch;
+	preempt_enable();
 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 
 	return 0;
 }
@@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	}
 
 	/* Enable storage key handling for the guest */
-	s390_enable_skey();
+	r = s390_enable_skey();
+	if (r)
+		goto out;
 
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.use_irqchip) {
 			/* Set up dummy routing. */
 			memset(&routing, 0, sizeof(routing));
-			kvm_set_irq_routing(kvm, &routing, 0, 0);
-			r = 0;
+			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 		}
 		break;
 	}
@@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	sprintf(debug_name, "kvm-%u", current->pid);
 
-	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
 	if (!kvm->arch.dbf)
 		goto out_err;
 
@@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
-	VM_EVENT(kvm, 3, "%s", "vm created");
+	VM_EVENT(kvm, 3, "vm created with type %lu", type);
 
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
@@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
+	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
 
 	return 0;
 out_err:
@@ -1110,6 +1193,7 @@ out_err:
 	free_page((unsigned long)kvm->arch.model.fac);
 	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)(kvm->arch.sca));
+	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	return rc;
 }
 
@@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm))
+	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
@@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
+	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
 }
 
 /* Section: vcpu related */
@@ -1198,21 +1283,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+/*
+ * Backs up the current FP/VX register save area on a particular
+ * destination.  Used to switch between different register save
+ * areas.
+ */
+static inline void save_fpu_to(struct fpu *dst)
+{
+	dst->fpc = current->thread.fpu.fpc;
+	dst->flags = current->thread.fpu.flags;
+	dst->regs = current->thread.fpu.regs;
+}
+
+/*
+ * Switches the FP/VX register save area from which to lazy
+ * restore register contents.
+ */
+static inline void load_fpu_from(struct fpu *from)
+{
+	current->thread.fpu.fpc = from->fpc;
+	current->thread.fpu.flags = from->flags;
+	current->thread.fpu.regs = from->regs;
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	if (test_kvm_facility(vcpu->kvm, 129))
-		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
-	else
-		save_fp_regs(vcpu->arch.host_fpregs.fprs);
-	save_access_regs(vcpu->arch.host_acrs);
+	/* Save host register state */
+	save_fpu_regs();
+	save_fpu_to(&vcpu->arch.host_fpregs);
+
 	if (test_kvm_facility(vcpu->kvm, 129)) {
-		restore_fp_ctl(&vcpu->run->s.regs.fpc);
-		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
+		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+		current->thread.fpu.flags = FPU_USE_VX;
+		/*
+		 * Use the register save area in the SIE-control block
+		 * for register restore and save in kvm_arch_vcpu_put()
+		 */
+		current->thread.fpu.vxrs =
+			(__vector128 *)&vcpu->run->s.regs.vrs;
+		/* Always enable the vector extension for KVM */
+		__ctl_set_vx();
+	} else
+		load_fpu_from(&vcpu->arch.guest_fpregs);
+
+	if (test_fp_ctl(current->thread.fpu.fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		current->thread.fpu.fpc = 0;
+
+	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1222,19 +1340,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		save_fp_ctl(&vcpu->run->s.regs.fpc);
-		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
-	save_access_regs(vcpu->run->s.regs.acrs);
-	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+
+	save_fpu_regs();
+
 	if (test_kvm_facility(vcpu->kvm, 129))
-		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+		/*
+		 * kvm_arch_vcpu_load() set up the register save area to
+		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+		 * are already saved.  Only the floating-point control must be
+		 * copied.
+		 */
+		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	else
-		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+		save_fpu_to(&vcpu->arch.guest_fpregs);
+	load_fpu_from(&vcpu->arch.host_fpregs);
+
+	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1264,7 +1385,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 {
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@ -1342,7 +1465,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+	if (vcpu->kvm->arch.use_cmma) {
 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
 		if (rc)
 			return rc;
@@ -1377,7 +1500,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
-	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1399,6 +1521,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
+	/*
+	 * Allocate a save area for floating-point registers.  If the vector
+	 * extension is available, register contents are saved in the SIE
+	 * control block.  The allocated save area is still required in
+	 * particular places, for example, in kvm_s390_vcpu_store_status().
+	 */
+	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+					       GFP_KERNEL);
+	if (!vcpu->arch.guest_fpregs.fprs) {
+		rc = -ENOMEM;
+		goto out_free_sie_block;
+	}
+
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
@@ -1621,16 +1756,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	load_fpu_from(&vcpu->arch.guest_fpregs);
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
 	return 0;
 }
@@ -1723,18 +1858,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-bool kvm_s390_cmma_enabled(struct kvm *kvm)
-{
-	if (!MACHINE_IS_LPAR)
-		return false;
-	/* only enable for z10 and later */
-	if (!MACHINE_HAS_EDAT1)
-		return false;
-	if (!kvm->arch.use_cmma)
-		return false;
-	return true;
-}
-
 static bool ibs_enabled(struct kvm_vcpu *vcpu)
 {
 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@ -2193,8 +2316,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * it into the save area
 	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		/*
+		 * If the vector extension is available, the vector registers
+		 * which overlaps with floating-point registers are saved in
+		 * the SIE-control block.  Hence, extract the floating-point
+		 * registers and the FPC value and store them in the
+		 * guest_fpregs structure.
+		 */
+		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
+		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+				 current->thread.fpu.vxrs);
+	} else
+		save_fpu_to(&vcpu->arch.guest_fpregs);
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2221,10 +2357,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
 
 	/*
 	 * The guest VXRS are in the host VXRs due to the lazy
-	 * copying in vcpu load/put. Let's update our copies before we save
-	 * it into the save area.
+	 * copying in vcpu load/put. We can simply call save_fpu_regs()
+	 * to save the current register state because we are in the
+	 * middle of a load/put cycle.
+	 *
+	 * Let's update our copies before we save it into the save area.
 	 */
-	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	save_fpu_regs();
 
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 }
@@ -2340,6 +2479,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	case KVM_CAP_S390_CSS_SUPPORT:
 		if (!vcpu->kvm->arch.css_support) {
 			vcpu->kvm->arch.css_support = 1;
+			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
 			trace_kvm_s390_enable_css(vcpu->kvm);
 		}
 		r = 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c5704786e473..c446aabf60d3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 #define TDB_FORMAT1		1
 #define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 
+extern debug_info_t *kvm_s390_dbf;
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
+	VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+		   prefix);
 	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
@@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-/* is cmma enabled */
-bool kvm_s390_cmma_enabled(struct kvm *kvm);
 unsigned long kvm_s390_fac_list_mask_size(void);
 extern unsigned long kvm_s390_fac_list_mask[];
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ad4242245771..4d21dc4d1a84 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 		kvm_s390_set_psw_cc(vcpu, 3);
 		return 0;
 	}
+	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
 	val = (val - hostclk) & ~0x3fUL;
 
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
 		cpup->arch.sie_block->epoch = val;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 
 	kvm_s390_set_psw_cc(vcpu, 0);
@@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
-
-	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 1, address);
 	return 0;
 }
@@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+	VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
 	trace_kvm_s390_handle_prefix(vcpu, 0, address);
 	return 0;
 }
@@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+	VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
 	trace_kvm_s390_handle_stap(vcpu, ga);
 	return 0;
 }
@@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu)
 		return rc;
 
 	rc = s390_enable_skey();
+	VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
 	trace_kvm_s390_skey_related_inst(vcpu);
 	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
 	return rc;
@@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 			    &fac, sizeof(fac));
 	if (rc)
 		return rc;
-	VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
+	VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
 	trace_kvm_s390_handle_stfl(vcpu, fac);
 	return 0;
 }
@@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+	VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
 	return 0;
 }
 
@@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	ar_t ar;
 
 	vcpu->stat.instruction_stsi++;
-	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+	VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 	struct gmap *gmap;
 	int i;
 
-	VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+	VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
 	gmap = vcpu->arch.gmap;
 	vcpu->stat.instruction_essa++;
-	if (!kvm_s390_cmma_enabled(vcpu->kvm))
+	if (!vcpu->kvm->arch.use_cmma)
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
@@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
 	reg = reg1;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 72e58bd2bee7..da690b69f9fe 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		return SIGP_CC_STATUS_STORED;
-	} else if (rc == 0) {
-		VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
-			   dst_vcpu->vcpu_id, irq.u.prefix.address);
 	}
 
 	return rc;
@@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 	return rc;
 }
 
-static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+					   u16 cpu_addr)
 {
 	if (!vcpu->kvm->arch.user_sigp)
 		return 0;
@@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
 	default:
 		vcpu->stat.instruction_sigp_unknown++;
 	}
-
-	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
-		   order_code);
+	VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+		   order_code, cpu_addr);
 
 	return 1;
 }
@@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
-	if (handle_sigp_order_in_user_space(vcpu, order_code))
+	if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
 		return -EOPNOTSUPP;
 
 	if (r1 % 2)
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 3208d33a48cb..cc1d6c68356f 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop,
 	{KVM_S390_PROGRAM_INT, "program interrupt"},			\
 	{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},			\
 	{KVM_S390_RESTART, "sigp restart"},				\
+	{KVM_S390_INT_PFAULT_INIT, "pfault init"},			\
+	{KVM_S390_INT_PFAULT_DONE, "pfault done"},			\
+	{KVM_S390_MCHK, "machine check"},				\
+	{KVM_S390_INT_CLOCK_COMP, "clock comparator"},			\
+	{KVM_S390_INT_CPU_TIMER, "cpu timer"},				\
 	{KVM_S390_INT_VIRTIO, "virtio interrupt"},			\
 	{KVM_S390_INT_SERVICE, "sclp interrupt"},			\
 	{KVM_S390_INT_EMERGENCY, "sigp emergency"},			\
 	{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
 
+#define get_irq_name(__type) \
+	(__type > KVM_S390_INT_IO_MAX ? \
+	__print_symbolic(__type, kvm_s390_int_type) : \
+		(__type & KVM_S390_INT_IO_AI_MASK ? \
+		 "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
 TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
 	    TP_ARGS(type, parm, parm64, who),
@@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
 		      (__entry->who == 1) ? " (from kernel)" :
 		      (__entry->who == 2) ? " (from user)" : "",
-		      __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->inttype, get_irq_name(__entry->inttype),
 		      __entry->parm, __entry->parm64)
 	);
 
 TRACE_EVENT(kvm_s390_inject_vcpu,
-	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
-		     int who),
-	    TP_ARGS(id, type, parm, parm64, who),
+	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+	    TP_ARGS(id, type, parm, parm64),
 
 	    TP_STRUCT__entry(
 		    __field(int, id)
 		    __field(__u32, inttype)
 		    __field(__u32, parm)
 		    __field(__u64, parm64)
-		    __field(int, who)
 		    ),
 
 	    TP_fast_assign(
@@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
 		    __entry->inttype = type & 0x00000000ffffffff;
 		    __entry->parm = parm;
 		    __entry->parm64 = parm64;
-		    __entry->who = who;
 		    ),
 
-	    TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
-		      (__entry->who == 1) ? " (from kernel)" :
-		      (__entry->who == 2) ? " (from user)" : "",
+	    TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->parm, __entry->parm64)
+		      get_irq_name(__entry->inttype), __entry->parm,
+		      __entry->parm64)
 	);
 
 /*
@@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
 	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
 		      "data:%08llx %016llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->data0, __entry->data1)
+		      get_irq_name(__entry->inttype), __entry->data0,
+		      __entry->data1)
 	);
 
 /*
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 16dc42d83f93..246a7eb4b680 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -26,6 +26,7 @@ void __delay(unsigned long loops)
          */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
+EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 4614d415bb58..0d002a746bec 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -370,22 +370,9 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 }
 EXPORT_SYMBOL(__strncpy_from_user);
 
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
-	uaccess_primary = 1;
-	return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
 static int __init uaccess_init(void)
 {
-	if (!uaccess_primary && test_facility(27))
+	if (test_facility(27))
 		static_key_slow_inc(&have_mvcos);
 	return 0;
 }
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4c8f5d7f9c23..f985856a538b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -646,7 +646,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
-	pid = sizeof(void *) == 4 ? param32 : param64;
+	pid = param64;
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1eb41bb3010c..12bbf0e8478f 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	do {
 		pte = *ptep;
 		barrier();
+		/* Similar to the PMD case, NUMA hinting must take slow path */
+		if (pte_protnone(pte))
+			return 0;
 		if ((pte_val(pte) & mask) != 0)
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
 		if (unlikely(pmd_large(pmd))) {
+			/*
+			 * NUMA hinting faults need to be handled in the GUP
+			 * slowpath for accounting purposes and so that they
+			 * can be serialised against THP migration.
+			 */
+			if (pmd_protnone(pmd))
+				return 0;
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 				return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e873748b56..2963b563621c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
 #include <linux/initrd.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
+#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -138,7 +139,7 @@ void __init mem_init(void)
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	atomic_set(&init_mm.context.attach_count, 1);
 
-        max_mapnr = max_low_pfn;
+	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 	/* Setup guest page hinting */
@@ -170,37 +171,36 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
-	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
-	struct zone *zone;
-	int rc;
+	unsigned long nr_pages;
+	int rc, zone_enum;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
-	for_each_zone(zone) {
-		if (zone_idx(zone) != ZONE_MOVABLE) {
-			/* Add range within existing zone limits */
-			zone_start_pfn = zone->zone_start_pfn;
-			zone_end_pfn = zone->zone_start_pfn +
-				       zone->spanned_pages;
+
+	while (size_pages > 0) {
+		if (start_pfn < dma_end_pfn) {
+			nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+				   dma_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_DMA;
+		} else if (start_pfn < normal_end_pfn) {
+			nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+				   normal_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_NORMAL;
 		} else {
-			/* Add remaining range to ZONE_MOVABLE */
-			zone_start_pfn = start_pfn;
-			zone_end_pfn = start_pfn + size_pages;
+			nr_pages = size_pages;
+			zone_enum = ZONE_MOVABLE;
 		}
-		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-			continue;
-		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+				 start_pfn, size_pages);
 		if (rc)
 			break;
 		start_pfn += nr_pages;
 		size_pages -= nr_pages;
-		if (!size_pages)
-			break;
 	}
 	if (rc)
 		vmem_remove_mapping(start, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b33f66110ca9..54ef3bc01b43 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
 #include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
@@ -28,12 +24,9 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-#define ALLOC_ORDER	2
-#define FRAG_MASK	0x03
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
-	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	struct page *page = alloc_pages(GFP_KERNEL, 2);
 
 	if (!page)
 		return NULL;
@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	free_pages((unsigned long) table, ALLOC_ORDER);
+	free_pages((unsigned long) table, 2);
 }
 
 static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
 	spin_lock_init(&gmap->guest_table_lock);
 	gmap->mm = mm;
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		goto out_free;
 	page->index = 0;
@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 	down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	if (page)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	return 0;
 }
 
@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
 }
 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	struct page *page;
-	unsigned long *table;
-
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-	if (!page)
-		return NULL;
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	atomic_set(&page->_mapcount, 0);
-	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
-	return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-	struct page *page;
-
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	pgtable_page_dtor(page);
-	atomic_set(&page->_mapcount, -1);
-	__free_page(page);
-}
-
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq)
 {
@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
 
 #else /* CONFIG_PGSTE */
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 			unsigned long vmaddr)
 {
@@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
  */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
-	unsigned long *uninitialized_var(table);
-	struct page *uninitialized_var(page);
+	unsigned long *table;
+	struct page *page;
 	unsigned int mask, bit;
 
-	if (mm_alloc_pgste(mm))
-		return page_table_alloc_pgste(mm);
-	/* Allocate fragments of a 4K page as 1K/2K page table */
-	spin_lock_bh(&mm->context.list_lock);
-	mask = FRAG_MASK;
-	if (!list_empty(&mm->context.pgtable_list)) {
-		page = list_first_entry(&mm->context.pgtable_list,
-					struct page, lru);
-		table = (unsigned long *) page_to_phys(page);
-		mask = atomic_read(&page->_mapcount);
-		mask = mask | (mask >> 4);
-	}
-	if ((mask & FRAG_MASK) == FRAG_MASK) {
-		spin_unlock_bh(&mm->context.list_lock);
-		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-		if (!page)
-			return NULL;
-		if (!pgtable_page_ctor(page)) {
-			__free_page(page);
-			return NULL;
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.list_lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			page = list_first_entry(&mm->context.pgtable_list,
+						struct page, lru);
+			mask = atomic_read(&page->_mapcount);
+			mask = (mask | (mask >> 4)) & 3;
+			if (mask != 3) {
+				table = (unsigned long *) page_to_phys(page);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&page->_mapcount, 1U << bit);
+				list_del(&page->lru);
+			}
 		}
+		spin_unlock_bh(&mm->context.list_lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	/* Initialize page table */
+	table = (unsigned long *) page_to_phys(page);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		atomic_set(&page->_mapcount, 3);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	} else {
+		/* Return the first 2K fragment of the page */
 		atomic_set(&page->_mapcount, 1);
-		table = (unsigned long *) page_to_phys(page);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
-	} else {
-		for (bit = 1; mask & bit; bit <<= 1)
-			table += PTRS_PER_PTE;
-		mask = atomic_xor_bits(&page->_mapcount, bit);
-		if ((mask & FRAG_MASK) == FRAG_MASK)
-			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
 	}
-	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 }
 
@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned int bit, mask;
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page))
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
-	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit);
-	if (mask & FRAG_MASK)
-		list_add(&page->lru, &mm->context.pgtable_list);
-	spin_unlock_bh(&mm->context.list_lock);
-	if (mask == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.list_lock);
+		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+		if (mask & 3)
+			list_add(&page->lru, &mm->context.pgtable_list);
+		else
+			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
+		if (mask != 0)
+			return;
 	}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
-	struct page *page;
 
-	if (bit == FRAG_MASK)
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
-	}
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
 }
 
 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 
 	mm = tlb->mm;
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page)) {
+	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		table = (unsigned long *) (__pa(table) | 3);
 		tlb_remove_table(tlb, table);
 		return;
 	}
-	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
-	if (mask & FRAG_MASK)
+	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+	if (mask & 3)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	else
+		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *) (__pa(table) | (bit << 4));
+	table = (unsigned long *) (__pa(table) | (1U << bit));
 	tlb_remove_table(tlb, table);
 }
 
 static void __tlb_remove_table(void *_table)
 {
-	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
-	void *table = (void *)((unsigned long) _table & ~mask);
-	unsigned type = (unsigned long) _table & mask;
-
-	if (type)
-		__page_table_free_rcu(table, type);
-	else
-		free_pages((unsigned long) table, ALLOC_ORDER);
+	unsigned int mask = (unsigned long) _table & 3;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+	switch (mask) {
+	case 0:		/* pmd or pud */
+		free_pages((unsigned long) table, 2);
+		break;
+	case 1:		/* lower 2K of a 4K page table */
+	case 2:		/* higher 2K of a 4K page table */
+		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+			break;
+		/* fallthrough */
+	case 3:		/* 4K page table with pgstes */
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+		break;
+	}
 }
 
 static void tlb_remove_table_smp_sync(void *arg)
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
new file mode 100644
index 000000000000..f94ecaffa71b
--- /dev/null
+++ b/arch/s390/numa/Makefile
@@ -0,0 +1,3 @@
+obj-y			+= numa.o
+obj-y			+= toptree.o
+obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
new file mode 100644
index 000000000000..7de4e2f780d7
--- /dev/null
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,530 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY	0
+#define DIST_CORE	1
+#define DIST_MC		2
+#define DIST_BOOK	3
+#define DIST_MAX	4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST	10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE	-1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
+	int total;			/* Total number of pinned cores */
+	int per_node_target;		/* Cores per node without extra cores */
+	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+		emu_cores->per_node[node_id]++;
+		emu_cores->to_node_id[core_id] = node_id;
+		emu_cores->total++;
+	} else {
+		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+	}
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+	return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+	return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+	struct toptree *core;
+	int count = 0;
+
+	toptree_for_each(core, tree, CORE) {
+		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+			count++;
+	}
+	return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+	return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+	return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+	if (core_book(core1)->id != core_book(core2)->id)
+		return DIST_BOOK;
+	if (core_mc(core1)->id != core_mc(core2)->id)
+		return DIST_MC;
+	/* Same core or sibling on same MC */
+	return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+	struct toptree *core_node;
+	int dist_min = DIST_MAX;
+
+	toptree_for_each(core_node, node, CORE)
+		dist_min = min(dist_min, dist_core_to_core(core_node, core));
+	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+	int nid;
+
+	toptree_unify(tree);
+	for (nid = 0; nid < emu_nodes; nid++)
+		toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+				     int extra)
+{
+	struct toptree *node, *node_best = NULL;
+	int dist_cur, dist_best, cores_target;
+
+	cores_target = emu_cores->per_node_target + extra;
+	dist_best = DIST_MAX;
+	node_best = NULL;
+	toptree_for_each(node, numa, NODE) {
+		/* Already pinned cores must use their nodes */
+		if (core_pinned_to_node_id(core) == node->id) {
+			node_best = node;
+			break;
+		}
+		/* Skip nodes that already have enough cores */
+		if (cores_pinned(node) >= cores_target)
+			continue;
+		dist_cur = dist_node_to_core(node, core);
+		if (dist_cur < dist_best) {
+			dist_best = dist_cur;
+			node_best = node;
+		}
+	}
+	return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+				   int extra)
+{
+	struct toptree *node, *core, *tmp;
+
+	toptree_for_each_safe(core, tmp, phys, CORE) {
+		node = node_for_core(numa, core, extra);
+		if (!node)
+			return;
+		toptree_move(core, node);
+		pin_core_to_node(core->id, node->id);
+	}
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+				    enum toptree_level level, bool perfect)
+{
+	int cores_free, cores_target = emu_cores->per_node_target;
+	struct toptree *cur, *tmp;
+
+	toptree_for_each_safe(cur, tmp, phys, level) {
+		cores_free = cores_target - toptree_count(node, CORE);
+		if (perfect) {
+			if (cores_free == toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		} else {
+			if (cores_free >= toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		}
+	}
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+			       enum toptree_level level, bool perfect)
+{
+	struct toptree *node;
+
+	toptree_for_each(node, numa, NODE)
+		move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+	struct toptree *core;
+
+	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, BOOK, true);
+	move_level_to_numa(numa, phys, BOOK, false);
+	move_level_to_numa(numa, phys, MC, true);
+	move_level_to_numa(numa, phys, MC, false);
+	/* Now pin all the moved cores */
+	toptree_for_each(core, numa, CORE)
+		pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+	struct toptree *tree;
+	int nid;
+
+	tree = toptree_alloc(TOPOLOGY, id);
+	if (!tree)
+		goto fail;
+	for (nid = 0; nid < nodes; nid++) {
+		if (!toptree_get_child(tree, nid))
+			goto fail;
+	}
+	return tree;
+fail:
+	panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+	int i;
+
+	emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+	if (emu_cores == NULL)
+		panic("Could not allocate cores to node memory");
+	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+		emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+	static int first = 1;
+	struct toptree *numa;
+	int cores_total;
+
+	cores_total = emu_cores->total + cores_free(phys);
+	emu_cores->per_node_target = cores_total / emu_nodes;
+	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+	if (first) {
+		toptree_to_numa_first(numa, phys);
+		first = 0;
+	}
+	toptree_to_numa_single(numa, phys, 0);
+	toptree_to_numa_single(numa, phys, 1);
+	toptree_unify_tree(numa);
+
+	WARN_ON(cpumask_weight(&phys->mask));
+	return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+	struct toptree *phys, *node, *book, *mc, *core;
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+	for_each_online_cpu(cpu) {
+		top = &per_cpu(cpu_topology, cpu);
+		node = toptree_get_child(phys, 0);
+		book = toptree_get_child(node, top->book_id);
+		mc = toptree_get_child(book, top->socket_id);
+		core = toptree_get_child(mc, top->core_id);
+		if (!book || !mc || !core)
+			panic("NUMA emulation could not allocate memory");
+		cpumask_set_cpu(cpu, &core->mask);
+		toptree_update_mask(mc);
+	}
+	return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	for_each_cpu(cpu, &core->mask) {
+		top = &per_cpu(cpu_topology, cpu);
+		cpumask_copy(&top->thread_mask, &core->mask);
+		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+		top->node_id = core_node(core)->id;
+	}
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+	struct toptree *core;
+	int i;
+
+	/* Clear all node masks */
+	for (i = 0; i < MAX_NUMNODES; i++)
+		cpumask_clear(node_to_cpumask_map[i]);
+
+	/* Rebuild all masks */
+	toptree_for_each(core, numa, CORE)
+		topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+	int nid, cid;
+
+	if (!numa_debug_enabled)
+		return;
+	printk(KERN_DEBUG "NUMA node to core mapping\n");
+	for (nid = 0; nid < emu_nodes; nid++) {
+		printk(KERN_DEBUG "  node %3d: ", nid);
+		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+			if (emu_cores->to_node_id[cid] == nid)
+				printk(KERN_CONT "%d ", cid);
+		}
+		printk(KERN_CONT "\n");
+	}
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+	struct toptree *phys, *numa;
+
+	if (emu_cores == NULL)
+		create_core_to_node_map();
+	phys = toptree_from_topology();
+	numa = toptree_to_numa(phys);
+	toptree_free(phys);
+	toptree_to_topology(numa);
+	toptree_free(numa);
+	print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+	size = size ? : CONFIG_EMU_SIZE;
+	size = roundup(size, memory_block_size_bytes());
+	return size;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+	int nodes_max;
+
+	nodes_max = memblock.memory.total_size / emu_size;
+	nodes_max = max(nodes_max, 1);
+	if (nodes_max >= nodes)
+		return nodes;
+	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+	return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+	emu_size = emu_setup_size_adjust(emu_size);
+	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+		emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+	return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+	return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+	.name = "emu",
+	.setup = emu_setup,
+	.update_cpu_topology = emu_update_cpu_topology,
+	.__pfn_to_nid = emu_pfn_to_nid,
+	.align = emu_align,
+	.distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+	int count;
+
+	if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+		return 0;
+	if (count <= 0)
+		return 0;
+	emu_nodes = min(count, MAX_NUMNODES);
+	return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+	emu_size = memparse(p, NULL);
+	return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
new file mode 100644
index 000000000000..09b1d2355bd9
--- /dev/null
+++ b/arch/s390/numa/numa.c
@@ -0,0 +1,184 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+	.name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+	if (mode->update_cpu_topology)
+		mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+	return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+	pg_data_t *res;
+
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+	if (!res)
+		panic("Could not allocate memory for node data!\n");
+	memset(res, 0, sizeof(pg_data_t));
+	return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+	unsigned long cur_base, align, end_of_dram;
+	int nid = 0;
+
+	end_of_dram = memblock_end_of_DRAM();
+	align = mode->align ? mode->align() : ULONG_MAX;
+
+	/*
+	 * Step through all available memory and assign it to the nodes
+	 * indicated by the mode implementation.
+	 * All nodes which are seen here will be set online.
+	 */
+	cur_base = 0;
+	do {
+		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+		node_set_online(nid);
+		memblock_set_node(cur_base, align, &memblock.memory, nid);
+		cur_base += align;
+	} while (cur_base < end_of_dram);
+
+	/* Allocate and fill out node_data */
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		NODE_DATA(nid) = alloc_node_data();
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_pfn;
+		unsigned long t_start, t_end;
+		int i;
+
+		start_pfn = ULONG_MAX;
+		end_pfn = 0;
+		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+			if (t_start < start_pfn)
+				start_pfn = t_start;
+			if (t_end > end_pfn)
+				end_pfn = t_end;
+		}
+		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+		NODE_DATA(nid)->node_id = nid;
+	}
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+	pr_info("NUMA mode: %s\n", mode->name);
+	if (mode->setup)
+		mode->setup();
+	numa_setup_memory();
+	memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+	/* Attach all possible CPUs to node 0 for now. */
+	cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+	return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		register_one_node(nid);
+	return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+	numa_debug_enabled = 1;
+	return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+	if (strcmp(parm, numa_mode_plain.name) == 0)
+		mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+	if (strcmp(parm, numa_mode_emu.name) == 0)
+		mode = &numa_mode_emu;
+#endif
+	return 0;
+}
+early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
new file mode 100644
index 000000000000..08953b0b1c7f
--- /dev/null
+++ b/arch/s390/numa/numa_mode.h
@@ -0,0 +1,24 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+	char *name;				/* Name of mode */
+	void (*setup)(void);			/* Initizalize mode */
+	void (*update_cpu_topology)(void);	/* Called by topology code */
+	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
+	unsigned long (*align)(void);		/* Minimum node alignment */
+	int (*distance)(int a, int b);		/* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
new file mode 100644
index 000000000000..902d350d859a
--- /dev/null
+++ b/arch/s390/numa/toptree.c
@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+	struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+	if (!res)
+		return res;
+
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->sibling);
+	cpumask_clear(&res->mask);
+	res->level = level;
+	res->id = id;
+	return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+	struct toptree *oldparent;
+
+	list_del_init(&cand->sibling);
+	oldparent = cand->parent;
+	cand->parent = NULL;
+	toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+	struct toptree *child, *tmp;
+
+	if (cand->parent)
+		toptree_remove(cand);
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_free(child);
+	kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+	struct toptree *child;
+
+	cpumask_clear(&cand->mask);
+	list_for_each_entry(child, &cand->children, sibling)
+		cpumask_or(&cand->mask, &cand->mask, &child->mask);
+	if (cand->parent)
+		toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+	if (!cand || !target)
+		return -1;
+	if (target->level != (cand->level + 1))
+		return -1;
+	list_add_tail(&cand->sibling, &target->children);
+	cand->parent = target;
+	toptree_update_mask(target);
+	return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *child, *tmp;
+
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+	struct toptree *child, *tmp, *cand_copy;
+
+	/* Threads cannot be split, cores are not split */
+	if (cand->level < 2)
+		return;
+
+	cand_copy = toptree_alloc(cand->level, 0);
+	toptree_for_each_child_safe(child, tmp, cand) {
+		struct toptree *tmpchild;
+
+		if (!cpumask_empty(&child->mask)) {
+			tmpchild = toptree_get_child(cand_copy, child->id);
+			toptree_move_children(child, tmpchild);
+		}
+		toptree_free(child);
+	}
+	toptree_move_children(cand_copy, cand);
+	toptree_free(cand_copy);
+
+	toptree_for_each_child(child, cand)
+		toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+	if (cand->level + 1 == target->level) {
+		toptree_remove(cand);
+		toptree_insert(cand, target);
+		return;
+	}
+
+	real_insert_point = NULL;
+	ptr = cand;
+	stack_target = NULL;
+
+	do {
+		tmp = stack_target;
+		stack_target = toptree_alloc(ptr->level + 1,
+					     ptr->parent->id);
+		toptree_insert(tmp, stack_target);
+		if (!real_insert_point)
+			real_insert_point = stack_target;
+		ptr = ptr->parent;
+	} while (stack_target->level < (target->level - 1));
+
+	toptree_remove(cand);
+	toptree_insert(cand, real_insert_point);
+	toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+	struct toptree *child;
+
+	toptree_for_each_child(child, cand)
+		if (child->id == id)
+			return child;
+	child = toptree_alloc(cand->level-1, id);
+	toptree_insert(child, cand);
+	return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+	struct toptree *child, *tmp;
+
+	if (context->level == level)
+		return context;
+
+	if (!list_empty(&context->children)) {
+		list_for_each_entry(child, &context->children, sibling) {
+			tmp = toptree_first(child, level);
+			if (tmp)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+	if (cur->parent == NULL)
+		return NULL;
+
+	if (cur == list_last_entry(&cur->parent->children,
+				   struct toptree, sibling))
+		return NULL;
+	return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level)
+{
+	struct toptree *cur_context, *tmp;
+
+	if (!cur)
+		return NULL;
+
+	if (context->level == level)
+		return NULL;
+
+	tmp = toptree_next_sibling(cur);
+	if (tmp != NULL)
+		return tmp;
+
+	cur_context = cur;
+	while (cur_context->level < context->level - 1) {
+		/* Step up */
+		cur_context = cur_context->parent;
+		/* Step aside */
+		tmp = toptree_next_sibling(cur_context);
+		if (tmp != NULL) {
+			/* Step down */
+			tmp = toptree_first(tmp, level);
+			if (tmp != NULL)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+	struct toptree *cur;
+	int cnt = 0;
+
+	toptree_for_each(cur, context, level)
+		cnt++;
+	return cnt;
+}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
new file mode 100644
index 000000000000..bdf502027af4
--- /dev/null
+++ b/arch/s390/numa/toptree.h
@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+	int level;
+	int id;
+	cpumask_t mask;
+	struct toptree *parent;
+	struct list_head sibling;
+	struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level);
+
+#define toptree_for_each_child(child, ptree)				\
+	list_for_each_entry(child,  &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree)			\
+	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree)					\
+	((ptree->parent == NULL) ||				\
+	 (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype);	\
+	     ptree != NULL;				\
+	     ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype),		\
+		     tmp = toptree_next(ptree, cont, ttype);	\
+	     ptree != NULL;					\
+	     ptree = tmp,					\
+		     tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start)			\
+	toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 598f023cf8a6..17c04c7269e7 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -76,11 +76,6 @@ EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
 static struct kmem_cache *zdev_fmb_cache;
 
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
-	return (struct zpci_dev *) pdev->sysdata;
-}
-
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 	struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 			      unsigned long offset,
 			      unsigned long max)
 {
-	struct zpci_dev *zdev =	get_zdev(pdev);
+	struct zpci_dev *zdev =	to_zpci(pdev);
 	u64 addr;
 	int idx;
 
@@ -385,7 +380,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
 
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	unsigned int hwirq, msi_vecs;
 	unsigned long aisb;
 	struct msi_desc *msi;
@@ -460,7 +455,7 @@ out:
 
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct msi_desc *msi;
 	int rc;
 
@@ -637,7 +632,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 	int i;
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!zdev->bars[i].size)
+		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -648,7 +643,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 
 int pcibios_add_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	int i;
 
@@ -673,7 +668,7 @@ void pcibios_release_device(struct pci_dev *pdev)
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 
 void pcibios_disable_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@ void pcibios_disable_device(struct pci_dev *pdev)
 static int zpci_restore(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret = 0;
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@ out:
 static int zpci_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 		return 0;
@@ -777,17 +772,22 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
 
 	ret = zpci_setup_bus_resources(zdev, &resources);
 	if (ret)
-		return ret;
+		goto error;
 
 	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
 				      zdev, &resources);
 	if (!zdev->bus) {
-		zpci_cleanup_bus_resources(zdev);
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
 	pci_bus_add_devices(zdev->bus);
 	return 0;
+
+error:
+	zpci_cleanup_bus_resources(zdev);
+	pci_free_resource_list(&resources);
+	return ret;
 }
 
 int zpci_enable_device(struct zpci_dev *zdev)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6fd8d5836138..42b76580c8b8 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -277,7 +277,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
@@ -316,7 +316,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
 				 struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
 	int npages;
 
@@ -337,7 +337,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
 	unsigned long pa;
 	dma_addr_t map;
@@ -367,7 +367,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ed2394dd14e9..369a3e05d468 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,15 +46,13 @@ struct zpci_ccdf_avail {
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
-	if (!zdev)
-		return;
-
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
-	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 }
 
 void zpci_event_error(void *data)
@@ -89,7 +87,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
+		pci_lock_rescan_remove();
 		pci_rescan_bus(zdev->bus);
+		pci_unlock_rescan_remove();
 		break;
 	case 0x0302: /* Reserved -> Standby */
 		if (!zdev)
@@ -97,7 +97,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (pdev)
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 
 		ret = zpci_disable_device(zdev);
 		if (ret)
@@ -114,7 +114,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 			/* Give the driver a hint that the function is
 			 * already unusable. */
 			pdev->error_state = pci_channel_io_perm_failure;
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 		}
 
 		zdev->fh = ccdf->fh;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 85267c058af8..dcc2634ccbe2 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,10 +8,23 @@
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
 #include <asm/processor.h>
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+	struct {
+		u8 cc;
+		u8 status;
+		u64 req;
+		u64 offset;
+	} data = {cc, status, req, offset};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
 /* Modify PCI Function Controls */
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 {
@@ -38,8 +51,8 @@ int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d\n",
-			     __func__, cc, status);
+		zpci_err_insn(cc, status, req, 0);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -72,8 +85,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  dma_addr: %Lx  size: %Lx\n",
-			    __func__, cc, status, addr, range);
+		zpci_err_insn(cc, status, addr, range);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -121,8 +134,8 @@ int zpci_load(u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@ int zpci_store(u64 data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			__func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index fa3ce891e597..f37a5808883d 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -16,7 +16,7 @@
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
 									\
 	return sprintf(buf, fmt, zdev->member);				\
 }									\
@@ -38,23 +38,30 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret;
 
 	if (!device_remove_file_self(dev, attr))
 		return count;
 
+	pci_lock_rescan_remove();
 	pci_stop_and_remove_bus_device(pdev);
 	ret = zpci_disable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	ret = zpci_enable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	pci_rescan_bus(zdev->bus);
+	pci_unlock_rescan_remove();
+
 	return count;
+
+error:
+	pci_unlock_rescan_remove();
+	return ret;
 }
 static DEVICE_ATTR_WO(recover);
 
@@ -64,7 +71,7 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 0a47bd3e7bee..4ca78ed71ad2 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -355,13 +355,12 @@ static int sq_dev_add(struct device *dev, struct subsys_interface *sif)
 	return error;
 }
 
-static int sq_dev_remove(struct device *dev, struct subsys_interface *sif)
+static void sq_dev_remove(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id;
 	struct kobject *kobj = sq_kobject[cpu];
 
 	kobject_put(kobj);
-	return 0;
 }
 
 static struct subsys_interface sq_interface = {
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index a3ed12f8f83b..825867c53853 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -198,16 +198,13 @@ static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif)
 	return err;
 }
 
-static int hv_stats_device_remove(struct device *dev,
-				  struct subsys_interface *sif)
+static void hv_stats_device_remove(struct device *dev,
+				   struct subsys_interface *sif)
 {
 	int cpu = dev->id;
 
-	if (!cpu_online(cpu))
-		return 0;
-
-	sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
-	return 0;
+	if (cpu_online(cpu))
+		sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
 }
 
 
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 5a1844765a7a..a7e257d9cb90 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -140,6 +140,7 @@ sysexit_from_sys_call:
 	 */
 	andl	$~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
 	movl	RIP(%rsp), %ecx		/* User %eip */
+	movq    RAX(%rsp), %rax
 	RESTORE_RSI_RDI
 	xorl	%edx, %edx		/* Do not leak kernel information */
 	xorq	%r8, %r8
@@ -219,7 +220,6 @@ sysexit_from_sys_call:
 1:	setbe	%al			/* 1 if error, 0 if not */
 	movzbl	%al, %edi		/* zero-extend that into %edi */
 	call	__audit_syscall_exit
-	movq	RAX(%rsp), %rax		/* reload syscall return value */
 	movl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -368,6 +368,7 @@ sysretl_from_sys_call:
 	RESTORE_RSI_RDI_RDX
 	movl	RIP(%rsp), %ecx
 	movl	EFLAGS(%rsp), %r11d
+	movq    RAX(%rsp), %rax
 	xorq	%r10, %r10
 	xorq	%r9, %r9
 	xorq	%r8, %r8
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec9038ec14..c12e845f59e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -252,6 +252,11 @@ struct kvm_pio_request {
 	int size;
 };
 
+struct rsvd_bits_validate {
+	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
+};
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +294,15 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *lm_root;
-	u64 rsvd_bits_mask[2][4];
-	u64 bad_mt_xwr;
+
+	/*
+	 * check zero bits on shadow page table entries, these
+	 * bits include not only hardware reserved bits but also
+	 * the bits spte never used.
+	 */
+	struct rsvd_bits_validate shadow_zero_check;
+
+	struct rsvd_bits_validate guest_rsvd_check;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -358,6 +370,11 @@ struct kvm_mtrr {
 	struct list_head head;
 };
 
+/* Hyper-V per vcpu emulation context */
+struct kvm_vcpu_hv {
+	u64 hv_vapic;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -514,8 +531,7 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_vapic;
+	struct kvm_vcpu_hv hyperv;
 
 	cpumask_var_t wbinvd_dirty_mask;
 
@@ -586,6 +602,17 @@ struct kvm_apic_map {
 	struct kvm_lapic *logical_map[16][16];
 };
 
+/* Hyper-V emulation context */
+struct kvm_hv {
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
+	u64 hv_tsc_page;
+
+	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
+	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
+	u64 hv_crash_ctl;
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -645,16 +672,14 @@ struct kvm_arch {
 	/* reads protected by irq_srcu, writes by irq_lock */
 	struct hlist_head mask_notifier_list;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_guest_os_id;
-	u64 hv_hypercall;
-	u64 hv_tsc_page;
+	struct kvm_hv hyperv;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
 	#endif
 
 	bool boot_vcpu_runs_old_kvmclock;
+	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
 };
@@ -1203,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 			    const struct kvm_userspace_memory_region *mem);
 int x86_set_memory_region(struct kvm *kvm,
 			  const struct kvm_userspace_memory_region *mem);
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c163215abb9a..aaf59b7da98a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
 
 struct ms_hyperv_info {
 	u32 features;
+	u32 misc_features;
 	u32 hints;
 };
 
@@ -20,4 +21,8 @@ void hyperv_vector_handler(struct pt_regs *regs);
 void hv_setup_vmbus_irq(void (*handler)(void));
 void hv_remove_vmbus_irq(void);
 
+void hv_setup_kexec_handler(void (*handler)(void));
+void hv_remove_kexec_handler(void);
+void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
+void hv_remove_crash_handler(void);
 #endif
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 6fe6b182c998..9dfce4e0417d 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
 	unsigned long ip;
 	unsigned long flags;
 	unsigned short cs;
-	unsigned short __pad2;	/* Was called gs, but was always zero. */
-	unsigned short __pad1;	/* Was called fs, but was always zero. */
-	unsigned short ss;
+	unsigned short gs;
+	unsigned short fs;
+	unsigned short __pad0;
 	unsigned long err;
 	unsigned long trapno;
 	unsigned long oldmask;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b7bf11..d7f3b3b78ac3 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,12 +79,12 @@ do {									\
 #else /* CONFIG_X86_32 */
 
 /* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
+#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
 
 #define __EXTRA_CLOBBER  \
 	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-	  "r12", "r13", "r14", "r15", "flags"
+	  "r12", "r13", "r14", "r15"
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							  \
@@ -100,11 +100,7 @@ do {									\
 #define __switch_canary_iparam
 #endif	/* CC_STACKPROTECTOR */
 
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
- * has no effect.
- */
+/* Save restore flags to clear handle leaking NT */
 #define switch_to(prev, next, last) \
 	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772edd19ab..448b7ca61aee 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -47,6 +47,7 @@
 #define CPU_BASED_MOV_DR_EXITING                0x00800000
 #define CPU_BASED_UNCOND_IO_EXITING             0x01000000
 #define CPU_BASED_USE_IO_BITMAPS                0x02000000
+#define CPU_BASED_MONITOR_TRAP_FLAG             0x08000000
 #define CPU_BASED_USE_MSR_BITMAPS               0x10000000
 #define CPU_BASED_MONITOR_EXITING               0x20000000
 #define CPU_BASED_PAUSE_EXITING                 0x40000000
@@ -367,29 +368,29 @@ enum vmcs_field {
 #define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
 #define TYPE_PHYSICAL_APIC_INST         (15 << 12)
 
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
+/* segment AR in VMCS -- these are different from what LAR reports */
+#define VMX_SEGMENT_AR_L_MASK (1 << 13)
+
+#define VMX_AR_TYPE_ACCESSES_MASK 1
+#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
+#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define VMX_AR_TYPE_CODE_MASK (1 << 3)
+#define VMX_AR_TYPE_MASK 0x0f
+#define VMX_AR_TYPE_BUSY_64_TSS 11
+#define VMX_AR_TYPE_BUSY_32_TSS 11
+#define VMX_AR_TYPE_BUSY_16_TSS 3
+#define VMX_AR_TYPE_LDT 2
+
+#define VMX_AR_UNUSABLE_MASK (1 << 16)
+#define VMX_AR_S_MASK (1 << 4)
+#define VMX_AR_P_MASK (1 << 7)
+#define VMX_AR_L_MASK (1 << 13)
+#define VMX_AR_DB_MASK (1 << 14)
+#define VMX_AR_G_MASK (1 << 15)
+#define VMX_AR_DPL_SHIFT 5
+#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
+
+#define VMX_AR_RESERVD_MASK 0xfffe0f00
 
 #define TSS_PRIVATE_MEMSLOT			(KVM_USER_MEM_SLOTS + 0)
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_USER_MEM_SLOTS + 1)
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index f36d56bd7632..f0412c50c47b 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -27,6 +27,8 @@
 #define HV_X64_MSR_VP_RUNTIME_AVAILABLE		(1 << 0)
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
+/* Partition reference TSC MSR is available */
+#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE              (1 << 9)
 
 /* A partition's reference time stamp counter (TSC) page */
 #define HV_X64_MSR_REFERENCE_TSC		0x40000021
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 0e8a973de9ee..40836a9a7250 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,24 +177,9 @@ struct sigcontext {
 	__u64 rip;
 	__u64 eflags;		/* RFLAGS */
 	__u16 cs;
-
-	/*
-	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
-	 * Linux saved and restored fs and gs in these slots.  This
-	 * was counterproductive, as fsbase and gsbase were never
-	 * saved, so arch_prctl was presumably unreliable.
-	 *
-	 * If these slots are ever needed for any other purpose, there
-	 * is some risk that very old 64-bit binaries could get
-	 * confused.  I doubt that many such binaries still work,
-	 * though, since the same patch in 2.5.64 also removed the
-	 * 64-bit set_thread_area syscall, so it appears that there is
-	 * no TLS API that works in both pre- and post-2.5.64 kernels.
-	 */
-	__u16 __pad2;		/* Was gs. */
-	__u16 __pad1;		/* Was fs. */
-
-	__u16 ss;
+	__u16 gs;
+	__u16 fs;
+	__u16 __pad0;
 	__u64 err;
 	__u64 trapno;
 	__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1fe92181ee9e..37fee272618f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -58,6 +58,7 @@
 #define EXIT_REASON_INVALID_STATE       33
 #define EXIT_REASON_MSR_LOAD_FAIL       34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_TRAP_FLAG   37
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY  41
@@ -106,6 +107,7 @@
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
 	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dcb52850a28f..cde732c1b495 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1424,7 +1424,7 @@ static inline void __x2apic_disable(void)
 {
 	u64 msr;
 
-	if (cpu_has_apic)
+	if (!cpu_has_apic)
 		return;
 
 	rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1483,10 +1483,13 @@ void x2apic_setup(void)
 
 static __init void x2apic_disable(void)
 {
-	u32 x2apic_id;
+	u32 x2apic_id, state = x2apic_state;
 
-	if (x2apic_state != X2APIC_ON)
-		goto out;
+	x2apic_mode = 0;
+	x2apic_state = X2APIC_DISABLED;
+
+	if (state != X2APIC_ON)
+		return;
 
 	x2apic_id = read_apic_id();
 	if (x2apic_id >= 255)
@@ -1494,9 +1497,6 @@ static __init void x2apic_disable(void)
 
 	__x2apic_disable();
 	register_lapic_address(mp_lapic_addr);
-out:
-	x2apic_state = X2APIC_DISABLED;
-	x2apic_mode = 0;
 }
 
 static __init void x2apic_enable(void)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f813261d9740..2683f36e4e0a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -322,7 +322,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irq_data->chip = &lapic_controller;
 		irq_data->chip_data = data;
 		irq_data->hwirq = virq + i;
-		err = assign_irq_vector_policy(virq, irq_data->node, data,
+		err = assign_irq_vector_policy(virq + i, irq_data->node, data,
 					       info);
 		if (err)
 			goto error;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6236a54a63f4..3c986390058a 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -377,17 +377,16 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
 	return err;
 }
 
-static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
+static void mc_device_remove(struct device *dev, struct subsys_interface *sif)
 {
 	int cpu = dev->id;
 
 	if (!cpu_online(cpu))
-		return 0;
+		return;
 
 	pr_debug("CPU%d removed\n", cpu);
 	microcode_fini_cpu(cpu);
 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
-	return 0;
 }
 
 static struct subsys_interface mc_cpu_interface = {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index aad4bd84b475..f794bfa3c138 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -18,6 +18,7 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/kexec.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 #include <asm/hyperv.h>
@@ -28,10 +29,14 @@
 #include <asm/i8259.h>
 #include <asm/apic.h>
 #include <asm/timer.h>
+#include <asm/reboot.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
+static void (*hv_kexec_handler)(void);
+static void (*hv_crash_handler)(struct pt_regs *regs);
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static void (*vmbus_handler)(void);
 
@@ -67,8 +72,47 @@ void hv_remove_vmbus_irq(void)
 }
 EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
 EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+
+void hv_setup_kexec_handler(void (*handler)(void))
+{
+	hv_kexec_handler = handler;
+}
+EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
+
+void hv_remove_kexec_handler(void)
+{
+	hv_kexec_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
+
+void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
+{
+	hv_crash_handler = handler;
+}
+EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
+
+void hv_remove_crash_handler(void)
+{
+	hv_crash_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
 #endif
 
+static void hv_machine_shutdown(void)
+{
+	if (kexec_in_progress && hv_kexec_handler)
+		hv_kexec_handler();
+	native_machine_shutdown();
+}
+
+static void hv_machine_crash_shutdown(struct pt_regs *regs)
+{
+	if (hv_crash_handler)
+		hv_crash_handler(regs);
+	native_machine_crash_shutdown(regs);
+}
+
+
 static uint32_t  __init ms_hyperv_platform(void)
 {
 	u32 eax;
@@ -114,6 +158,7 @@ static void __init ms_hyperv_init_platform(void)
 	 * Extract the features and hints
 	 */
 	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
+	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
 	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
@@ -141,6 +186,8 @@ static void __init ms_hyperv_init_platform(void)
 	no_timer_check = 1;
 #endif
 
+	machine_ops.shutdown = hv_machine_shutdown;
+	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index b9826a981fb2..6326ae24e4d5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2534,7 +2534,7 @@ static int intel_pmu_cpu_prepare(int cpu)
 	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
 		cpuc->shared_regs = allocate_shared_regs(cpu);
 		if (!cpuc->shared_regs)
-			return NOTIFY_BAD;
+			goto err;
 	}
 
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
@@ -2542,18 +2542,27 @@ static int intel_pmu_cpu_prepare(int cpu)
 
 		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
 		if (!cpuc->constraint_list)
-			return NOTIFY_BAD;
+			goto err_shared_regs;
 
 		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-		if (!cpuc->excl_cntrs) {
-			kfree(cpuc->constraint_list);
-			kfree(cpuc->shared_regs);
-			return NOTIFY_BAD;
-		}
+		if (!cpuc->excl_cntrs)
+			goto err_constraint_list;
+
 		cpuc->excl_thread_id = 0;
 	}
 
 	return NOTIFY_OK;
+
+err_constraint_list:
+	kfree(cpuc->constraint_list);
+	cpuc->constraint_list = NULL;
+
+err_shared_regs:
+	kfree(cpuc->shared_regs);
+	cpuc->shared_regs = NULL;
+
+err:
+	return NOTIFY_BAD;
 }
 
 static void intel_pmu_cpu_starting(int cpu)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 63eb68b73589..377e8f8ed391 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -1255,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu)
 	cpumask_set_cpu(cpu, &cqm_cpumask);
 }
 
-static void intel_cqm_cpu_prepare(unsigned int cpu)
+static void intel_cqm_cpu_starting(unsigned int cpu)
 {
 	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1296,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb,
 	unsigned int cpu  = (unsigned long)hcpu;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-		intel_cqm_cpu_prepare(cpu);
-		break;
 	case CPU_DOWN_PREPARE:
 		intel_cqm_cpu_exit(cpu);
 		break;
 	case CPU_STARTING:
+		intel_cqm_cpu_starting(cpu);
 		cqm_pick_event_reader(cpu);
 		break;
 	}
@@ -1373,7 +1371,7 @@ static int __init intel_cqm_init(void)
 		goto out;
 
 	for_each_online_cpu(i) {
-		intel_cqm_cpu_prepare(i);
+		intel_cqm_cpu_starting(i);
 		cqm_pick_event_reader(i);
 	}
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 79de954626fd..d25097c3fc1d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -270,7 +270,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
-	if (src_fpu->fpstate_active)
+	if (src_fpu->fpstate_active && cpu_has_fpu)
 		fpu_copy(dst_fpu, src_fpu);
 
 	return 0;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 1e173f6285c7..d14e9ac3235a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -40,7 +40,12 @@ static void fpu__init_cpu_generic(void)
 	write_cr0(cr0);
 
 	/* Flush out any pending x87 state: */
-	asm volatile ("fninit");
+#ifdef CONFIG_MATH_EMULATION
+	if (!cpu_has_fpu)
+		fpstate_init_soft(&current->thread.fpu.state.soft);
+	else
+#endif
+		asm volatile ("fninit");
 }
 
 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 397688beed4b..c27cad726765 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -408,6 +408,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 static void mwait_idle(void)
 {
 	if (!current_set_polling_and_test()) {
+		trace_cpu_idle_rcuidle(1, smp_processor_id());
 		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
 			smp_mb(); /* quirk */
 			clflush((void *)&current_thread_info()->flags);
@@ -419,6 +420,7 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
+		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	} else {
 		local_irq_enable();
 	}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 206996c1669d..71820c42b6ce 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
+#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+		/* Kernel saves and restores only the CS segment register on signals,
+		 * which is the bare minimum needed to allow mixed 32/64-bit code.
+		 * App's signal handler can save/restore other segments if needed. */
+		COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 #else /* !CONFIG_X86_32 */
 		put_user_ex(regs->flags, &sc->flags);
 		put_user_ex(regs->cs, &sc->cs);
-		put_user_ex(0, &sc->__pad2);
-		put_user_ex(0, &sc->__pad1);
-		put_user_ex(regs->ss, &sc->ss);
+		put_user_ex(0, &sc->gs);
+		put_user_ex(0, &sc->fs);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -451,19 +457,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 	regs->sp = (unsigned long)frame;
 
-	/*
-	 * Set up the CS and SS registers to run signal handlers in
-	 * 64-bit mode, even if the handler happens to be interrupting
-	 * 32-bit or 16-bit code.
-	 *
-	 * SS is subtle.  In 64-bit mode, we don't need any particular
-	 * SS descriptor, but we do need SS to be valid.  It's possible
-	 * that the old SS is entirely bogus -- this can happen if the
-	 * signal we're trying to deliver is #GP or #SS caused by a bad
-	 * SS value.
-	 */
+	/* Set up the CS register to run signal handlers in 64-bit mode,
+	   even if the handler happens to be interrupting 32-bit code. */
 	regs->cs = __USER_CS;
-	regs->ss = __USER_DS;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 6273324186ac..0ccb53a9fcd9 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -28,11 +28,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 		struct desc_struct *desc;
 		unsigned long base;
 
-		seg &= ~7UL;
+		seg >>= 3;
 
 		mutex_lock(&child->mm->context.lock);
 		if (unlikely(!child->mm->context.ldt ||
-			     (seg >> 3) >= child->mm->context.ldt->size))
+			     seg >= child->mm->context.ldt->size))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
 			desc = &child->mm->context.ldt->entries[seg];
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215cb8953..a1ff508bb423 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
+			   hyperv.o
+
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o pmu_intel.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
new file mode 100644
index 000000000000..a8160d2ae362
--- /dev/null
+++ b/arch/x86/kvm/hyperv.c
@@ -0,0 +1,377 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "x86.h"
+#include "lapic.h"
+#include "hyperv.h"
+
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
+	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	*pdata = hv->hv_crash_param[index];
+	return 0;
+}
+
+static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	*pdata = hv->hv_crash_ctl;
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (host)
+		hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
+
+	if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
+
+		vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
+			  hv->hv_crash_param[0],
+			  hv->hv_crash_param[1],
+			  hv->hv_crash_param[2],
+			  hv->hv_crash_param[3],
+			  hv->hv_crash_param[4]);
+
+		/* Send notification about crash to user space */
+		kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
+	}
+
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 data)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	hv->hv_crash_param[index] = data;
+	return 0;
+}
+
+static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
+			     bool host)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		hv->hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!hv->hv_guest_os_id)
+			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!hv->hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			hv->hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (__copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		hv->hv_hypercall = data;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+
+		memset(&tsc_ref, 0, sizeof(tsc_ref));
+		hv->hv_tsc_page = data;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+			break;
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		if (kvm_write_guest(
+				kvm,
+				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
+				&tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_set_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 data);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		u64 gfn;
+		unsigned long addr;
+
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			hv->hv_vapic = data;
+			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+				return 1;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		hv->hv_vapic = data;
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+		if (kvm_lapic_enable_pv_eoi(vcpu,
+					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+			return 1;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = hv->hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = hv->hv_hypercall;
+		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		data =
+		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = hv->hv_tsc_page;
+		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_get_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 pdata);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+
+		kvm_for_each_vcpu(r, v, vcpu->kvm) {
+			if (v == vcpu) {
+				data = r;
+				break;
+			}
+		}
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+	case HV_X64_MSR_APIC_ASSIST_PAGE:
+		data = hv->hv_vapic;
+		break;
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_set_msr(vcpu, msr, data);
+}
+
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_get_msr(vcpu, msr, pdata);
+}
+
+bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	longmode = is_64_bit_mode(vcpu);
+
+	if (!longmode) {
+		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
new file mode 100644
index 000000000000..c7bce559f67b
--- /dev/null
+++ b/arch/x86/kvm/hyperv.h
@@ -0,0 +1,32 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __ARCH_X86_KVM_HYPERV_H__
+#define __ARCH_X86_KVM_HYPERV_H__
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+bool kvm_hv_hypercall_enabled(struct kvm *kvm);
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index fef922ff2635..7cc2360f1848 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -651,15 +651,10 @@ fail_unlock:
 	return NULL;
 }
 
-void kvm_destroy_pic(struct kvm *kvm)
+void kvm_destroy_pic(struct kvm_pic *vpic)
 {
-	struct kvm_pic *vpic = kvm->arch.vpic;
-
-	if (vpic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
-		kvm->arch.vpic = NULL;
-		kfree(vpic);
-	}
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+	kfree(vpic);
 }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73008c5..3d782a2c336a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -74,7 +74,7 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_destroy_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm_pic *vpic);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
@@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
-	int ret;
+	struct kvm_pic *vpic = pic_irqchip(kvm);
 
-	ret = (pic_irqchip(kvm) != NULL);
+	/* Read vpic before kvm->irq_routing.  */
 	smp_rmb();
-	return ret;
+	return vpic != NULL;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97c263b..9a3e342e3cda 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
-				sizeof(u32));
+	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+				  sizeof(u32)))
+		return;
 
 	apic_set_tpr(vcpu->arch.apic, data & 0xff);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71952748222a..764037991d26 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
 static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 44171462bd2a..fb16a8ea3dee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)
 {
 	return ACCESS_ONCE(*sptep);
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	/* It is valid if the spte is zapped. */
-	return spte == 0ull;
-}
 #else
 union split_spte {
 	struct {
@@ -478,23 +472,6 @@ retry:
 
 	return spte.spte;
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	union split_spte sspte = (union split_spte)spte;
-	u32 high_mmio_mask = shadow_mmio_mask >> 32;
-
-	/* It is valid if the spte is zapped. */
-	if (spte == 0ull)
-		return true;
-
-	/* It is valid if the spte is being zapped. */
-	if (sspte.spte_low == 0ull &&
-	    (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
-		return true;
-
-	return false;
-}
 #endif
 
 static bool spte_is_locklessly_modifiable(u64 spte)
@@ -3291,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
 	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
-static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
 {
-	if (direct)
-		return vcpu_match_mmio_gpa(vcpu, addr);
+	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
 
-	return vcpu_match_mmio_gva(vcpu, addr);
+	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
 }
 
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
 
-/*
- * On direct hosts, the last spte is only allows two states
- * for mmio page fault:
- *   - It is the mmio spte
- *   - It is zapped or it is being zapped.
- *
- * This function completely checks the spte when the last spte
- * is not the mmio spte.
- */
-static bool check_direct_spte_mmio_pf(u64 spte)
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
 {
-	return __check_direct_spte_mmio_pf(spte);
+	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
 }
 
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+	if (direct)
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 spte = 0ull;
+	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+	int root, leaf;
+	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return spte;
+		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+
+	for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
+	     shadow_walk_okay(&iterator);
+	     __shadow_walk_next(&iterator, spte)) {
+		leaf = iterator.level;
+		spte = mmu_spte_get_lockless(iterator.sptep);
+
+		sptes[leaf - 1] = spte;
+
 		if (!is_shadow_present_pte(spte))
 			break;
+
+		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+						    leaf);
+	}
+
 	walk_shadow_page_lockless_end(vcpu);
 
-	return spte;
+	if (reserved) {
+		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+		       __func__, addr);
+		while (root >= leaf) {
+			pr_err("------ spte 0x%llx level %d.\n",
+			       sptes[root - 1], root);
+			root--;
+		}
+	}
+exit:
+	*sptep = spte;
+	return reserved;
 }
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
+	bool reserved;
 
 	if (quickly_check_mmio_pf(vcpu, addr, direct))
 		return RET_MMIO_PF_EMULATE;
 
-	spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+	if (unlikely(reserved))
+		return RET_MMIO_PF_BUG;
 
 	if (is_mmio_spte(spte)) {
 		gfn_t gfn = get_mmio_spte_gfn(spte);
@@ -3356,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	}
 
 	/*
-	 * It's ok if the gva is remapped by other cpus on shadow guest,
-	 * it's a BUG if the gfn is not a mmio page.
-	 */
-	if (direct && !check_direct_spte_mmio_pf(spte))
-		return RET_MMIO_PF_BUG;
-
-	/*
 	 * If the page table is zapped by other cpus, let CPU fault again on
 	 * the address.
 	 */
@@ -3604,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+			struct rsvd_bits_validate *rsvd_check,
+			int maxphyaddr, int level, bool nx, bool gbpages,
+			bool pse)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 	u64 gbpages_bit_rsvd = 0;
 	u64 nonleaf_bit8_rsvd = 0;
 
-	context->bad_mt_xwr = 0;
+	rsvd_check->bad_mt_xwr = 0;
 
-	if (!context->nx)
+	if (!nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
-	if (!guest_cpuid_has_gbpages(vcpu))
+	if (!gbpages)
 		gbpages_bit_rsvd = rsvd_bits(7, 7);
 
 	/*
@@ -3626,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	if (guest_cpuid_is_amd(vcpu))
 		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
 
-	switch (context->root_level) {
+	switch (level) {
 	case PT32_ROOT_LEVEL:
 		/* no rsvd bits for 2 level 4K page table entries */
-		context->rsvd_bits_mask[0][1] = 0;
-		context->rsvd_bits_mask[0][0] = 0;
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[0][1] = 0;
+		rsvd_check->rsvd_bits_mask[0][0] = 0;
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 
-		if (!is_pse(vcpu)) {
-			context->rsvd_bits_mask[1][1] = 0;
+		if (!pse) {
+			rsvd_check->rsvd_bits_mask[1][1] = 0;
 			break;
 		}
 
 		if (is_cpuid_PSE36())
 			/* 36bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
 		else
 			/* 32 bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 		break;
 	case PT32E_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][2] =
+		rsvd_check->rsvd_bits_mask[0][2] =
 			rsvd_bits(maxphyaddr, 63) |
 			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);	/* PDE */
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62); 	/* PTE */
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	case PT64_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | gbpages_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-		context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][3] =
+			rsvd_check->rsvd_bits_mask[0][3];
+		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
 			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 29);
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	}
 }
 
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+				cpuid_maxphyaddr(vcpu), context->root_level,
+				context->nx, guest_cpuid_has_gbpages(vcpu),
+				is_pse(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+			    int maxphyaddr, bool execonly)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	int pte;
 
-	context->rsvd_bits_mask[0][3] =
+	rsvd_check->rsvd_bits_mask[0][3] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-	context->rsvd_bits_mask[0][2] =
+	rsvd_check->rsvd_bits_mask[0][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][1] =
+	rsvd_check->rsvd_bits_mask[0][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
 	/* large page */
-	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-	context->rsvd_bits_mask[1][2] =
+	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+	rsvd_check->rsvd_bits_mask[1][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-	context->rsvd_bits_mask[1][1] =
+	rsvd_check->rsvd_bits_mask[1][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
 	for (pte = 0; pte < 64; pte++) {
 		int rwx_bits = pte & 7;
@@ -3707,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
 				rwx_bits == 0x2 || rwx_bits == 0x6 ||
 				(rwx_bits == 0x4 && !execonly))
-			context->bad_mt_xwr |= (1ull << pte);
+			rsvd_check->bad_mt_xwr |= (1ull << pte);
 	}
 }
 
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+				    cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+				boot_cpu_data.x86_phys_bits,
+				context->shadow_root_level, context->nx,
+				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu));
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context)
+{
+	if (guest_cpuid_is_amd(vcpu))
+		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+					boot_cpu_data.x86_phys_bits,
+					context->shadow_root_level, false,
+					cpu_has_gbpages, true);
+	else
+		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+					    boot_cpu_data.x86_phys_bits,
+					    false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+				    boot_cpu_data.x86_phys_bits, execonly);
+}
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				      struct kvm_mmu *mmu, bool ept)
 {
@@ -3889,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
+	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3916,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	context->base_role.smap_andnot_wp
 		= smap && !is_write_protection(vcpu);
 	context->base_role.smm = is_smm(vcpu);
+	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -3939,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
 
 	update_permission_bitmask(vcpu, context, true);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
@@ -4860,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	return nr_mmu_pages;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
-{
-	struct kvm_shadow_walk_iterator iterator;
-	u64 spte;
-	int nr_sptes = 0;
-
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return nr_sptes;
-
-	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-		sptes[iterator.level-1] = spte;
-		nr_sptes++;
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
-	walk_shadow_page_lockless_end(vcpu);
-
-	return nr_sptes;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 398d21c0f6dd..e4202e41d535 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e)
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+
 /*
  * Return values of handle_mmio_page_fault_common:
  * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f67d7e24800..736e6ab8784d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 	*access &= mask;
 }
 
-static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
-{
-	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
-
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
-		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
-}
-
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
@@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
 		goto no_present;
 
 	if (!FNAME(is_present_gpte)(gpte))
@@ -353,8 +345,7 @@ retry_walk:
 		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
-		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte,
-					             walker->level))) {
+		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
 			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 886aa25a7131..39b91127ef07 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	/* MSR_K7_PERFCTRn */
 	pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
 	if (pmc) {
-		if (!msr_info->host_initiated)
-			data = (s64)data;
 		pmc->counter += data - pmc_read_counter(pmc);
 		return 0;
 	}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8e0c0844c6b9..74d825716f4f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1173,6 +1173,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
 		return 0;
 
+	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
+	    kvm_read_cr0(vcpu) & X86_CR0_CD)
+		return _PAGE_NOCACHE;
+
 	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
 	return mtrr2protval[mtrr];
 }
@@ -1667,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	/*
-	 * re-enable caching here because the QEMU bios
-	 * does not do it - this results in some delay at
-	 * reboot
-	 */
-	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
-		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+	/* These are emulated via page tables.  */
+	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
@@ -2106,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
+	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5cd75d5..da1590ea43fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
-		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
-		CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
-		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
+		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
+		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
+		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
 	 * hardware. For example, L1 can specify an MSR bitmap - and we
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 	vmx_segment_cache_clear(to_vmx(vcpu));
 
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
-	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
 		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
 				     __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
-			     (guest_tr_ar & ~AR_TYPE_MASK)
-			     | AR_TYPE_BUSY_64_TSS);
+			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
+			     | VMX_AR_TYPE_BUSY_64_TSS);
 	}
 	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
 }
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 		return 0;
 	else {
 		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
-		return AR_DPL(ar);
+		return VMX_AR_DPL(ar);
 	}
 }
 
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 
 	if (cs.unusable)
 		return false;
-	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
 		return false;
 	if (!cs.s)
 		return false;
-	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
+	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
 		if (cs.dpl > cs_rpl)
 			return false;
 	} else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (!var.present)
 		return false;
-	if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
 		if (var.dpl < rpl) /* DPL < RPL */
 			return false;
 	}
@@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
-static u64 ept_rsvd_mask(u64 spte, int level)
-{
-	int i;
-	u64 mask = 0;
-
-	for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
-		mask |= (1ULL << i);
-
-	if (level == 4)
-		/* bits 7:3 reserved */
-		mask |= 0xf8;
-	else if (spte & (1ULL << 7))
-		/*
-		 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
-		 * level == 1 if the hypervisor is using the ignored bit 7.
-		 */
-		mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
-	else if (level > 1)
-		/* bits 6:3 reserved */
-		mask |= 0x78;
-
-	return mask;
-}
-
-static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
-				       int level)
-{
-	printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
-
-	/* 010b (write-only) */
-	WARN_ON((spte & 0x7) == 0x2);
-
-	/* 110b (write/execute) */
-	WARN_ON((spte & 0x7) == 0x6);
-
-	/* 100b (execute-only) and value not supported by logical processor */
-	if (!cpu_has_vmx_ept_execute_only())
-		WARN_ON((spte & 0x7) == 0x4);
-
-	/* not 000b */
-	if ((spte & 0x7)) {
-		u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
-
-		if (rsvd_bits != 0) {
-			printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
-					 __func__, rsvd_bits);
-			WARN_ON(1);
-		}
-
-		/* bits 5:3 are _not_ reserved for large page or leaf page */
-		if ((rsvd_bits & 0x38) == 0) {
-			u64 ept_mem_type = (spte & 0x38) >> 3;
-
-			if (ept_mem_type == 2 || ept_mem_type == 3 ||
-			    ept_mem_type == 7) {
-				printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
-						__func__, ept_mem_type);
-				WARN_ON(1);
-			}
-		}
-	}
-}
-
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
-	u64 sptes[4];
-	int nr_sptes, i, ret;
+	int ret;
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 		return 1;
 
 	/* It is the real ept misconfig */
-	printk(KERN_ERR "EPT: Misconfiguration.\n");
-	printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
-
-	nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
-
-	for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
-		ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+	WARN_ON(1);
 
 	vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 	vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
@@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
 	return handle_nop(vcpu);
 }
 
+static int handle_monitor_trap(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 static int handle_monitor(struct kvm_vcpu *vcpu)
 {
 	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
@@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
  */
 static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 				 unsigned long exit_qualification,
-				 u32 vmx_instruction_info, gva_t *ret)
+				 u32 vmx_instruction_info, bool wr, gva_t *ret)
 {
+	gva_t off;
+	bool exn;
+	struct kvm_segment s;
+
 	/*
 	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
 	 * Execution", on an exit, vmx_instruction_info holds most of the
@@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Addr = segment_base + offset */
 	/* offset = base + [index * scale] + displacement */
-	*ret = vmx_get_segment_base(vcpu, seg_reg);
+	off = exit_qualification; /* holds the displacement */
 	if (base_is_valid)
-		*ret += kvm_register_read(vcpu, base_reg);
+		off += kvm_register_read(vcpu, base_reg);
 	if (index_is_valid)
-		*ret += kvm_register_read(vcpu, index_reg)<<scaling;
-	*ret += exit_qualification; /* holds the displacement */
+		off += kvm_register_read(vcpu, index_reg)<<scaling;
+	vmx_get_segment(vcpu, &s, seg_reg);
+	*ret = s.base + off;
 
 	if (addr_size == 1) /* 32 bit */
 		*ret &= 0xffffffff;
 
-	/*
-	 * TODO: throw #GP (and return 1) in various cases that the VM*
-	 * instructions require it - e.g., offset beyond segment limit,
-	 * unusable or unreadable/unwritable segment, non-canonical 64-bit
-	 * address, and so on. Currently these are not checked.
-	 */
+	/* Checks for #GP/#SS exceptions. */
+	exn = false;
+	if (is_protmode(vcpu)) {
+		/* Protected mode: apply checks for segment validity in the
+		 * following order:
+		 * - segment type check (#GP(0) may be thrown)
+		 * - usability check (#GP(0)/#SS(0))
+		 * - limit check (#GP(0)/#SS(0))
+		 */
+		if (wr)
+			/* #GP(0) if the destination operand is located in a
+			 * read-only data segment or any code segment.
+			 */
+			exn = ((s.type & 0xa) == 0 || (s.type & 8));
+		else
+			/* #GP(0) if the source operand is located in an
+			 * execute-only code segment
+			 */
+			exn = ((s.type & 0xa) == 8);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is an only check for long mode.
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
+		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
+		 */
+		exn = (s.unusable != 0);
+		/* Protected mode: #GP(0)/#SS(0) if the memory
+		 * operand is outside the segment limit.
+		 */
+		exn = exn || (off + sizeof(u64) > s.limit);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu,
+				      seg_reg == VCPU_SREG_SS ?
+						SS_VECTOR : GP_VECTOR,
+				      0);
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 			field_value);
 	} else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, true, &gva))
 			return 1;
 		/* _system ok, as nested_vmx_check_permission verified cpl=0 */
 		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
@@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 			(((vmx_instruction_info) >> 3) & 0xf));
 	else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, false, &gva))
 			return 1;
 		if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
 			   &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
@@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (get_vmx_mem_address(vcpu, exit_qualification,
-			vmx_instruction_info, &vmcs_gva))
+			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
 	/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
 	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
@@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	 * operand is read even if it isn't needed (e.g., for type==global)
 	 */
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmx_instruction_info, &gva))
+			vmx_instruction_info, false, &gva))
 		return 1;
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
 				sizeof(operand), &e)) {
@@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
+	[EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
@@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return true;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
+	case EXIT_REASON_MONITOR_TRAP_FLAG:
+		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
 	case EXIT_REASON_PAUSE_INSTRUCTION:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ef2560075bf..4bbc2a1676c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
 #include "cpuid.h"
 #include "assigned-dev.h"
 #include "pmu.h"
+#include "hyperv.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr)
 void kvm_define_shared_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
+	shared_msrs_global.msrs[slot] = msr;
 	if (slot >= shared_msrs_global.nr)
 		shared_msrs_global.nr = slot + 1;
-	shared_msrs_global.msrs[slot] = msr;
-	/* we need ensured the shared_msr_global have been updated */
-	smp_wmb();
 }
 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
@@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if (is_present_gpte(pdpte[i]) &&
-		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
+		    (pdpte[i] &
+		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -949,6 +949,8 @@ static u32 emulated_msrs[] = {
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
+	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
 		 __func__, base_khz, scaled_khz, shift, *pmultiplier);
 }
 
-static inline u64 get_kernel_ns(void)
-{
-	return ktime_get_boot_ns();
-}
-
 #ifdef CONFIG_X86_64
 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 #endif
@@ -1869,123 +1866,6 @@ out:
 	return r;
 }
 
-static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
-{
-	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
-}
-
-static bool kvm_hv_msr_partition_wide(u32 msr)
-{
-	bool r = false;
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-	case HV_X64_MSR_HYPERCALL:
-	case HV_X64_MSR_REFERENCE_TSC:
-	case HV_X64_MSR_TIME_REF_COUNT:
-		r = true;
-		break;
-	}
-
-	return r;
-}
-
-static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		kvm->arch.hv_guest_os_id = data;
-		/* setting guest os id to zero disables hypercall page */
-		if (!kvm->arch.hv_guest_os_id)
-			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
-		break;
-	case HV_X64_MSR_HYPERCALL: {
-		u64 gfn;
-		unsigned long addr;
-		u8 instructions[4];
-
-		/* if guest os id is not set hypercall should remain disabled */
-		if (!kvm->arch.hv_guest_os_id)
-			break;
-		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
-			kvm->arch.hv_hypercall = data;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
-		addr = gfn_to_hva(kvm, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		kvm_x86_ops->patch_hypercall(vcpu, instructions);
-		((unsigned char *)instructions)[3] = 0xc3; /* ret */
-		if (__copy_to_user((void __user *)addr, instructions, 4))
-			return 1;
-		kvm->arch.hv_hypercall = data;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
-		kvm->arch.hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-			&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-	return 0;
-}
-
-static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	switch (msr) {
-	case HV_X64_MSR_APIC_ASSIST_PAGE: {
-		u64 gfn;
-		unsigned long addr;
-
-		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
-			vcpu->arch.hv_vapic = data;
-			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
-				return 1;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
-		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		if (__clear_user((void __user *)addr, PAGE_SIZE))
-			return 1;
-		vcpu->arch.hv_vapic = data;
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-		if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
-			return 1;
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-
-	return 0;
-}
-
 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
@@ -2105,7 +1985,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
-				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
+				adjust_tsc_offset_guest(vcpu, adj);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
 		}
@@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = set_msr_hyperv_pw(vcpu, msr, data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return set_msr_hyperv(vcpu, msr, data);
-		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_set_msr_common(vcpu, msr, data,
+					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
 		/* Drop writes to this legacy MSR -- see rdmsr
 		 * counterpart for further detail.
@@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		data = kvm->arch.hv_guest_os_id;
-		break;
-	case HV_X64_MSR_HYPERCALL:
-		data = kvm->arch.hv_hypercall;
-		break;
-	case HV_X64_MSR_TIME_REF_COUNT: {
-		data =
-		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC:
-		data = kvm->arch.hv_tsc_page;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-
-	*pdata = data;
-	return 0;
-}
-
-static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-
-	switch (msr) {
-	case HV_X64_MSR_VP_INDEX: {
-		int r;
-		struct kvm_vcpu *v;
-		kvm_for_each_vcpu(r, v, vcpu->kvm) {
-			if (v == vcpu) {
-				data = r;
-				break;
-			}
-		}
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
-	case HV_X64_MSR_APIC_ASSIST_PAGE:
-		data = vcpu->arch.hv_vapic;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-	*pdata = data;
-	return 0;
-}
-
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	switch (msr_info->index) {
@@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0x20000000;
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr_info->index)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_get_msr_common(vcpu,
+					     msr_info->index, &msr_info->data);
 		break;
 	case MSR_IA32_BBL_CR_CTL3:
 		/* This legacy MSR exists but isn't fully documented in current
@@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_DEADLINE_TIMER:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
+	case KVM_CAP_SET_BOOT_CPU_ID:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3817,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = kvm_ioapic_init(kvm);
 			if (r) {
 				mutex_lock(&kvm->slots_lock);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_master);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_slave);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_eclr);
+				kvm_destroy_pic(vpic);
 				mutex_unlock(&kvm->slots_lock);
-				kfree(vpic);
 				goto create_irqchip_unlock;
 			}
 		} else
 			goto create_irqchip_unlock;
-		smp_wmb();
-		kvm->arch.vpic = vpic;
-		smp_wmb();
 		r = kvm_setup_default_irq_routing(kvm);
 		if (r) {
 			mutex_lock(&kvm->slots_lock);
 			mutex_lock(&kvm->irq_lock);
 			kvm_ioapic_destroy(kvm);
-			kvm_destroy_pic(kvm);
+			kvm_destroy_pic(vpic);
 			mutex_unlock(&kvm->irq_lock);
 			mutex_unlock(&kvm->slots_lock);
+			goto create_irqchip_unlock;
 		}
+		/* Write kvm->irq_routing before kvm->arch.vpic.  */
+		smp_wmb();
+		kvm->arch.vpic = vpic;
 	create_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
@@ -3967,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_reinject(kvm, &control);
 		break;
 	}
+	case KVM_SET_BOOT_CPU_ID:
+		r = 0;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) != 0)
+			r = -EBUSY;
+		else
+			kvm->arch.bsp_vcpu_id = arg;
+		mutex_unlock(&kvm->lock);
+		break;
 	case KVM_XEN_HVM_CONFIG: {
 		r = -EFAULT;
 		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
@@ -5882,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
-{
-	u64 param, ingpa, outgpa, ret;
-	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
-	bool fast, longmode;
-
-	/*
-	 * hypercall generates UD from non zero cpl and real mode
-	 * per HYPER-V spec
-	 */
-	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 0;
-	}
-
-	longmode = is_64_bit_mode(vcpu);
-
-	if (!longmode) {
-		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
-		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
-		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
-	}
-#ifdef CONFIG_X86_64
-	else {
-		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
-		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
-		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
-	}
-#endif
-
-	code = param & 0xffff;
-	fast = (param >> 16) & 0x1;
-	rep_cnt = (param >> 32) & 0xfff;
-	rep_idx = (param >> 48) & 0xfff;
-
-	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
-
-	switch (code) {
-	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
-		kvm_vcpu_on_spin(vcpu);
-		break;
-	default:
-		res = HV_STATUS_INVALID_HYPERCALL_CODE;
-		break;
-	}
-
-	ret = res | (((u64)rep_done & 0xfff) << 32);
-	if (longmode) {
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
-	} else {
-		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
-	}
-
-	return 1;
-}
-
 /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
@@ -6327,6 +6081,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 static void process_smi(struct kvm_vcpu *vcpu)
 {
 	struct kvm_segment cs, ds;
+	struct desc_ptr dt;
 	char buf[512];
 	u32 cr0;
 
@@ -6359,6 +6114,10 @@ static void process_smi(struct kvm_vcpu *vcpu)
 
 	kvm_x86_ops->set_cr4(vcpu, 0);
 
+	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
+	dt.address = dt.size = 0;
+	kvm_x86_ops->set_idt(vcpu, &dt);
+
 	__kvm_set_dr(vcpu, 7, DR7_FIXED_1);
 
 	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
@@ -6513,6 +6272,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
+		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+			r = 0;
+			goto out;
+		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7535,6 +7300,17 @@ void kvm_arch_check_processor_compat(void *rtn)
 	kvm_x86_ops->check_processor_compatibility(rtn);
 }
 
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
 	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 0ca2f3e4803c..2f822cd886c2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,11 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
 	return kvm_register_write(vcpu, reg, val);
 }
 
+static inline u64 get_kernel_ns(void)
+{
+	return ktime_get_boot_ns();
+}
+
 static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 {
 	return !(kvm->arch.disabled_quirks & quirk);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index f37e84ab49f3..3d8f2e421466 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -29,7 +29,6 @@
 
 #include <asm/uaccess.h>
 #include <asm/traps.h>
-#include <asm/desc.h>
 #include <asm/user.h>
 #include <asm/fpu/internal.h>
 
@@ -181,7 +180,7 @@ void math_emulate(struct math_emu_info *info)
 			math_abort(FPU_info, SIGILL);
 		}
 
-		code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+		code_descriptor = FPU_get_ldt_descriptor(FPU_CS);
 		if (SEG_D_SIZE(code_descriptor)) {
 			/* The above test may be wrong, the book is not clear */
 			/* Segmented 32 bit protected mode */
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 9ccecb61a4fa..5e044d506b7a 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -16,9 +16,24 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 
-/* s is always from a cpu register, and the cpu does bounds checking
- * during register load --> no further bounds checks needed */
-#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
+{
+	static struct desc_struct zero_desc;
+	struct desc_struct ret = zero_desc;
+
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+	seg >>= 3;
+	mutex_lock(&current->mm->context.lock);
+	if (current->mm->context.ldt && seg < current->mm->context.ldt->size)
+		ret = current->mm->context.ldt->entries[seg];
+	mutex_unlock(&current->mm->context.lock);
+#endif
+	return ret;
+}
+
 #define SEG_D_SIZE(x)		((x).b & (3 << 21))
 #define SEG_G_BIT(x)		((x).b & (1 << 23))
 #define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 6ef5e99380f9..8300db71c2a6 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -20,7 +20,6 @@
 #include <linux/stddef.h>
 
 #include <asm/uaccess.h>
-#include <asm/desc.h>
 
 #include "fpu_system.h"
 #include "exception.h"
@@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 		addr->selector = PM_REG_(segment);
 	}
 
-	descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+	descriptor = FPU_get_ldt_descriptor(addr->selector);
 	base_address = SEG_BASE_ADDR(descriptor);
 	address = base_address + offset;
 	limit = base_address
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e88fda867a33..484145368a24 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -8,7 +8,7 @@ config XEN
 	select PARAVIRT_CLOCK
 	select XEN_HAVE_PVMMU
 	depends on X86_64 || (X86_32 && X86_PAE)
-	depends on X86_TSC
+	depends on X86_LOCAL_APIC && X86_TSC
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
@@ -17,7 +17,7 @@ config XEN
 config XEN_DOM0
 	def_bool y
 	depends on XEN && PCI_XEN && SWIOTLB_XEN
-	depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
+	depends on X86_IO_APIC && ACPI && PCI
 
 config XEN_PVHVM
 	def_bool y
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7322755f337a..4b6e29ac0968 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,13 +13,13 @@ CFLAGS_mmu.o			:= $(nostackp)
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
 			grant-table.o suspend.o platform-pci-unplug.o \
-			p2m.o
+			p2m.o apic.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
-obj-$(CONFIG_XEN_DOM0)		+= apic.o vga.o
+obj-$(CONFIG_XEN_DOM0)		+= vga.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)		+= efi.o
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index c20fe29e65f4..2292721b1d10 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -101,17 +101,15 @@ struct dom0_vga_console_info;
 
 #ifdef CONFIG_XEN_DOM0
 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
-void __init xen_init_apic(void);
 #else
 static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
 				       size_t size)
 {
 }
-static inline void __init xen_init_apic(void)
-{
-}
 #endif
 
+void __init xen_init_apic(void);
+
 #ifdef CONFIG_XEN_EFI
 extern void xen_efi_init(void);
 #else
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e5b872ba2484..3bd3504a6cc7 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -14,12 +14,15 @@ config XTENSA
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_DMA_ATTRS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
+	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -61,9 +64,7 @@ config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
 config MMU
-	bool
-	default n if !XTENSA_VARIANT_CUSTOM
-	default XTENSA_VARIANT_MMU if XTENSA_VARIANT_CUSTOM
+	def_bool n
 
 config VARIANT_IRQ_SWITCH
 	def_bool n
@@ -71,9 +72,6 @@ config VARIANT_IRQ_SWITCH
 config HAVE_XTENSA_GPIO32
 	def_bool n
 
-config MAY_HAVE_SMP
-	def_bool n
-
 menu "Processor type and features"
 
 choice
@@ -100,7 +98,6 @@ config XTENSA_VARIANT_DC233C
 
 config XTENSA_VARIANT_CUSTOM
 	bool "Custom Xtensa processor configuration"
-	select MAY_HAVE_SMP
 	select HAVE_XTENSA_GPIO32
 	help
 	  Select this variant to use a custom Xtensa processor configuration.
@@ -126,10 +123,21 @@ config XTENSA_VARIANT_MMU
 	bool "Core variant has a Full MMU (TLB, Pages, Protection, etc)"
 	depends on XTENSA_VARIANT_CUSTOM
 	default y
+	select MMU
 	help
 	  Build a Conventional Kernel with full MMU support,
 	  ie: it supports a TLB with auto-loading, page protection.
 
+config XTENSA_VARIANT_HAVE_PERF_EVENTS
+	bool "Core variant has Performance Monitor Module"
+	depends on XTENSA_VARIANT_CUSTOM
+	default n
+	help
+	  Enable if core variant has Performance Monitor Module with
+	  External Registers Interface.
+
+	  If unsure, say N.
+
 config XTENSA_UNALIGNED_USER
 	bool "Unaligned memory access in use space"
 	help
@@ -143,7 +151,7 @@ source "kernel/Kconfig.preempt"
 
 config HAVE_SMP
 	bool "System Supports SMP (MX)"
-	depends on MAY_HAVE_SMP
+	depends on XTENSA_VARIANT_CUSTOM
 	select XTENSA_MX
 	help
 	  This option is use to indicate that the system-on-a-chip (SOC)
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 5b478accd5fc..63c223dff5f1 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -2,7 +2,6 @@ generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += cputime.h
-generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += errno.h
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 00b7d46b35b8..ebcd1f6fc8cb 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -29,7 +29,7 @@
  *
  * Locking interrupts looks like this:
  *
- *    rsil a15, LOCKLEVEL
+ *    rsil a15, TOPLEVEL
  *    <code>
  *    wsr  a15, PS
  *    rsync
@@ -106,7 +106,7 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"\
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -124,7 +124,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"	\
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -272,7 +272,7 @@ static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       xor     %1, %4, %3\n"
 			"       and     %0, %0, %4\n"
@@ -306,7 +306,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       or      %0, %0, %1\n"
 			"       s32i    %0, %2, 0\n"
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 370b26f38414..201e9009efd8 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@ __cmpxchg_u32(volatile int *p, int old, int new)
 	return new;
 #else
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       bne     %0, %2, 1f\n"
 			"       s32i    %3, %1, 0\n"
@@ -123,7 +123,7 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 #else
 	unsigned long tmp;
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       s32i    %2, %1, 0\n"
 			"       wsr     a15, ps\n"
diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h
new file mode 100644
index 000000000000..fe1f5c878493
--- /dev/null
+++ b/arch/xtensa/include/asm/device.h
@@ -0,0 +1,19 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#ifndef _ASM_XTENSA_DEVICE_H
+#define _ASM_XTENSA_DEVICE_H
+
+struct dma_map_ops;
+
+struct dev_archdata {
+	/* DMA operations on that device */
+	struct dma_map_ops *dma_ops;
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_XTENSA_DEVICE_H */
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 1f5f6dc09736..f01cb3044e50 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -1,11 +1,10 @@
 /*
- * include/asm-xtensa/dma-mapping.h
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2003 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_DMA_MAPPING_H
@@ -13,142 +12,67 @@
 
 #include <asm/cache.h>
 #include <asm/io.h>
+
+#include <asm-generic/dma-coherent.h>
+
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
-/*
- * DMA-consistent mapping functions.
- */
-
-extern void *consistent_alloc(int, size_t, dma_addr_t, unsigned long);
-extern void consistent_free(void*, size_t, dma_addr_t);
-extern void consistent_sync(void*, size_t, int);
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag);
+extern struct dma_map_ops xtensa_dma_map_ops;
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle);
-
-static inline dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-	       enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-	consistent_sync(ptr, size, direction);
-	return virt_to_phys(ptr);
-}
-
-static inline void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction direction)
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
-	BUG_ON(direction == DMA_NONE);
+	if (dev && dev->archdata.dma_ops)
+		return dev->archdata.dma_ops;
+	else
+		return &xtensa_dma_map_ops;
 }
 
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
-	   enum dma_data_direction direction)
-{
-	int i;
-	struct scatterlist *sg;
-
-	BUG_ON(direction == DMA_NONE);
-
-	for_each_sg(sglist, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
+#include <asm-generic/dma-mapping-common.h>
 
-		sg->dma_address = sg_phys(sg);
-		consistent_sync(sg_virt(sg), sg->length, direction);
-	}
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
 
-	return nents;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
-	     size_t size, enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-	return (dma_addr_t)(page_to_pfn(page)) * PAGE_SIZE + offset;
-}
-
-static inline void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	       enum dma_data_direction direction)
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t gfp,
+				    struct dma_attrs *attrs)
 {
-	BUG_ON(direction == DMA_NONE);
-}
+	void *ret;
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
 
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-	     enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-}
-
-static inline void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	consistent_sync((void *)bus_to_virt(dma_handle), size, direction);
-}
+	ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
 
-static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-		           size_t size, enum dma_data_direction direction)
-{
-	consistent_sync((void *)bus_to_virt(dma_handle), size, direction);
+	return ret;
 }
 
-static inline void
-dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
-{
-
-	consistent_sync((void *)bus_to_virt(dma_handle)+offset,size,direction);
-}
-
-static inline void
-dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dma_handle,
+				  struct dma_attrs *attrs)
 {
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-	consistent_sync((void *)bus_to_virt(dma_handle)+offset,size,direction);
-}
-static inline void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nelems,
-		 enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
+	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+		return;
 
-	for_each_sg(sglist, sg, nelems, i)
-		consistent_sync(sg_virt(sg), sg->length, dir);
+	ops->free(dev, size, vaddr, dma_handle, attrs);
+	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
 }
 
-static inline void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
-		       int nelems, enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, nelems, i)
-		consistent_sync(sg_virt(sg), sg->length, dir);
-}
 static inline int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return 0;
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	debug_dma_mapping_error(dev, dma_addr);
+	return ops->mapping_error(dev, dma_addr);
 }
 
 static inline int
@@ -168,39 +92,7 @@ dma_set_mask(struct device *dev, u64 mask)
 	return 0;
 }
 
-static inline void
-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-	       enum dma_data_direction direction)
-{
-	consistent_sync(vaddr, size, direction);
-}
-
-/* Not supported for now */
-static inline int dma_mmap_coherent(struct device *dev,
-				    struct vm_area_struct *vma, void *cpu_addr,
-				    dma_addr_t dma_addr, size_t size)
-{
-	return -EINVAL;
-}
-
-static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
-				  void *cpu_addr, dma_addr_t dma_addr,
-				  size_t size)
-{
-	return -EINVAL;
-}
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
-				    dma_addr_t *dma_handle, gfp_t flag,
-				    struct dma_attrs *attrs)
-{
-	return NULL;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
-				  void *vaddr, dma_addr_t dma_handle,
-				  struct dma_attrs *attrs)
-{
-}
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction direction);
 
 #endif	/* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index ea36674c6ec5..8e090c709046 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -6,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_IRQFLAGS_H
@@ -23,8 +24,27 @@ static inline unsigned long arch_local_save_flags(void)
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
-	asm volatile("rsil %0, "__stringify(LOCKLEVEL)
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	unsigned long tmp;
+
+	asm volatile("rsr	%0, ps\t\n"
+		     "extui	%1, %0, 0, 4\t\n"
+		     "bgei	%1, "__stringify(LOCKLEVEL)", 1f\t\n"
+		     "rsil	%0, "__stringify(LOCKLEVEL)"\n"
+		     "1:"
+		     : "=a" (flags), "=a" (tmp) :: "memory");
+#else
+	asm volatile("rsr	%0, ps\t\n"
+		     "or	%0, %0, %1\t\n"
+		     "xsr	%0, ps\t\n"
+		     "rsync"
+		     : "=&a" (flags) : "a" (LOCKLEVEL) : "memory");
+#endif
+#else
+	asm volatile("rsil	%0, "__stringify(LOCKLEVEL)
 		     : "=a" (flags) :: "memory");
+#endif
 	return flags;
 }
 
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index b61bdf0eea25..83e2e4bc01ba 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -1,11 +1,10 @@
 /*
- * include/asm-xtensa/processor.h
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_PROCESSOR_H
@@ -45,6 +44,14 @@
 #define STACK_TOP_MAX	STACK_TOP
 
 /*
+ * General exception cause assigned to fake NMI. Fake NMI needs to be handled
+ * differently from other interrupts, but it uses common kernel entry/exit
+ * code.
+ */
+
+#define EXCCAUSE_MAPPED_NMI	62
+
+/*
  * General exception cause assigned to debug exceptions. Debug exceptions go
  * to their own vector, rather than the general exception vectors (user,
  * kernel, double); and their specific causes are reported via DEBUGCAUSE
@@ -65,10 +72,30 @@
 
 #define VALID_DOUBLE_EXCEPTION_ADDRESS	64
 
+#define XTENSA_INT_LEVEL(intno) _XTENSA_INT_LEVEL(intno)
+#define _XTENSA_INT_LEVEL(intno) XCHAL_INT##intno##_LEVEL
+
+#define XTENSA_INTLEVEL_MASK(level) _XTENSA_INTLEVEL_MASK(level)
+#define _XTENSA_INTLEVEL_MASK(level) (XCHAL_INTLEVEL##level##_MASK)
+
+#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
+
+#define PROFILING_INTLEVEL XTENSA_INT_LEVEL(XCHAL_PROFILING_INTERRUPT)
+
 /* LOCKLEVEL defines the interrupt level that masks all
  * general-purpose interrupts.
  */
+#if defined(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) && \
+	defined(XCHAL_PROFILING_INTERRUPT) && \
+	PROFILING_INTLEVEL == XCHAL_EXCM_LEVEL && \
+	XCHAL_EXCM_LEVEL > 1 && \
+	IS_POW2(XTENSA_INTLEVEL_MASK(PROFILING_INTLEVEL))
+#define LOCKLEVEL (XCHAL_EXCM_LEVEL - 1)
+#else
 #define LOCKLEVEL XCHAL_EXCM_LEVEL
+#endif
+#define TOPLEVEL XCHAL_EXCM_LEVEL
+#define XTENSA_FAKE_NMI (LOCKLEVEL < TOPLEVEL)
 
 /* WSBITS and WBBITS are the width of the WINDOWSTART and WINDOWBASE
  * registers
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index 6a05fcb0a20d..fe06e8ed162b 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -33,4 +33,12 @@ void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
 		void *data);
 
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data);
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data);
+
 #endif /* _XTENSA_STACKTRACE_H */
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 677bfcf4ee5d..28f33a8b7f5f 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -25,30 +25,39 @@ static inline void spill_registers(void)
 {
 #if XCHAL_NUM_AREGS > 16
 	__asm__ __volatile__ (
-		"	call12	1f\n"
+		"	call8	1f\n"
 		"	_j	2f\n"
 		"	retw\n"
 		"	.align	4\n"
 		"1:\n"
+#if XCHAL_NUM_AREGS == 32
+		"	_entry	a1, 32\n"
+		"	addi	a8, a0, 3\n"
+		"	_entry	a1, 16\n"
+		"	mov	a12, a12\n"
+		"	retw\n"
+#else
 		"	_entry	a1, 48\n"
-		"	addi	a12, a0, 3\n"
-#if XCHAL_NUM_AREGS > 32
-		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n"
+		"	call12	1f\n"
+		"	retw\n"
+		"	.align	4\n"
+		"1:\n"
+		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n"
 		"	_entry	a1, 48\n"
 		"	mov	a12, a0\n"
 		"	.endr\n"
-#endif
-		"	_entry	a1, 48\n"
+		"	_entry	a1, 16\n"
 #if XCHAL_NUM_AREGS % 12 == 0
-		"	mov	a8, a8\n"
-#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a12, a12\n"
-#elif XCHAL_NUM_AREGS % 12 == 8
+#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a4, a4\n"
+#elif XCHAL_NUM_AREGS % 12 == 8
+		"	mov	a8, a8\n"
 #endif
 		"	retw\n"
+#endif
 		"2:\n"
-		: : : "a12", "a13", "memory");
+		: : : "a8", "a9", "memory");
 #else
 	__asm__ __volatile__ (
 		"	mov	a12, a12\n"
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index d3a0f0fd56dd..50137bc9e150 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
 obj-$(CONFIG_SMP) += smp.o mxhead.o
+obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
 
 AFLAGS_head.o += -mtext-section-literals
 
@@ -27,10 +28,11 @@ AFLAGS_head.o += -mtext-section-literals
 #
 # Replicate rules in scripts/Makefile.build
 
-sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g' \
-	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g'	 \
-	-e 's/\*(\(\.text .*\))/*(.literal \1)/g'			 \
-	-e 's/\*(\(\.text\.[a-z]*\))/*(\1.literal \1)/g'
+sed-y = -e ':a; s/\*(\([^)]*\)\.text\.unlikely/*(\1.literal.unlikely .{text}.unlikely/; ta; ' \
+	-e ':b; s/\*(\([^)]*\)\.text\(\.[a-z]*\)/*(\1.{text}\2.literal .{text}\2/; tb; ' \
+	-e ':c; s/\*(\([^)]*\)\(\.[a-z]*it\|\.ref\)\.text/*(\1\2.literal \2.{text}/; tc; ' \
+	-e ':d; s/\*(\([^)]\+ \|\)\.text/*(\1.literal .{text}/; td; ' \
+	-e 's/\.{text}/.text/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
 cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $<    \
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 82bbfa5a05b3..504130357597 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/entry.S
- *
  * Low-level exception handling
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -8,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2004 - 2008 by Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Chris Zankel <chris@zankel.net>
  *
@@ -75,6 +74,27 @@
 #endif
 	.endm
 
+
+	.macro	irq_save flags tmp
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	rsr	\flags, ps
+	extui	\tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	\tmp, LOCKLEVEL, 99f
+	rsil	\tmp, LOCKLEVEL
+99:
+#else
+	movi	\tmp, LOCKLEVEL
+	rsr	\flags, ps
+	or	\flags, \flags, \tmp
+	xsr	\flags, ps
+	rsync
+#endif
+#else
+	rsil	\flags, LOCKLEVEL
+#endif
+	.endm
+
 /* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
 
 /*
@@ -122,6 +142,7 @@ _user_exception:
 	/* Save SAR and turn off single stepping */
 
 	movi	a2, 0
+	wsr	a2, depc		# terminate user stack trace with 0
 	rsr	a3, sar
 	xsr	a2, icountlevel
 	s32i	a3, a1, PT_SAR
@@ -301,7 +322,18 @@ _kernel_exception:
 	s32i	a14, a1, PT_AREG14
 	s32i	a15, a1, PT_AREG15
 
+	_bnei	a2, 1, 1f
+
+	/* Copy spill slots of a0 and a1 to imitate movsp
+	 * in order to keep exception stack continuous
+	 */
+	l32i	a3, a1, PT_SIZE
+	l32i	a0, a1, PT_SIZE + 4
+	s32e	a3, a1, -16
+	s32e	a0, a1, -12
 1:
+	l32i	a0, a1, PT_AREG0	# restore saved a0
+	wsr	a0, depc
 
 #ifdef KERNEL_STACK_OVERFLOW_CHECK
 
@@ -340,75 +372,88 @@ common_exception:
 
 	/* It is now save to restore the EXC_TABLE_FIXUP variable. */
 
-	rsr	a0, exccause
+	rsr	a2, exccause
 	movi	a3, 0
-	rsr	a2, excsave1
-	s32i	a0, a1, PT_EXCCAUSE
-	s32i	a3, a2, EXC_TABLE_FIXUP
-
-	/* All unrecoverable states are saved on stack, now, and a1 is valid,
-	 * so we can allow exceptions and interrupts (*) again.
-	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
+	rsr	a0, excsave1
+	s32i	a2, a1, PT_EXCCAUSE
+	s32i	a3, a0, EXC_TABLE_FIXUP
+
+	/* All unrecoverable states are saved on stack, now, and a1 is valid.
+	 * Now we can allow exceptions again. In case we've got an interrupt
+	 * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
+	 * otherwise it's left unchanged.
 	 *
-	 * (*) We only allow interrupts if they were previously enabled and
-	 *     we're not handling an IRQ
+	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
 	 */
 
 	rsr	a3, ps
-	addi	a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
-	movi	a2, LOCKLEVEL
+	s32i	a3, a1, PT_PS		# save ps
+
+#if XTENSA_FAKE_NMI
+	/* Correct PS needs to be saved in the PT_PS:
+	 * - in case of exception or level-1 interrupt it's in the PS,
+	 *   and is already saved.
+	 * - in case of medium level interrupt it's in the excsave2.
+	 */
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	beq	a2, a0, .Lmedium_level_irq
+	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
+	beqz	a3, .Llevel1_irq	# level-1 IRQ sets ps.intlevel to 0
+
+.Lmedium_level_irq:
+	rsr	a0, excsave2
+	s32i	a0, a1, PT_PS		# save medium-level interrupt ps
+	bgei	a3, LOCKLEVEL, .Lexception
+
+.Llevel1_irq:
+	movi	a3, LOCKLEVEL
+
+.Lexception:
+	movi	a0, 1 << PS_WOE_BIT
+	or	a3, a3, a0
+#else
+	addi	a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
+	movi	a0, LOCKLEVEL
 	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
 					# a3 = PS.INTLEVEL
-	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
+	moveqz	a3, a0, a2		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
-	rsr	a0, exccause
-	xsr	a3, ps
+	rsr	a2, exccause
+#endif
 
-	s32i	a3, a1, PT_PS		# save ps
+	/* restore return address (or 0 if return to userspace) */
+	rsr	a0, depc
+	wsr	a3, ps
+	rsync				# PS.WOE => rsync => overflow
 
 	/* Save lbeg, lend */
 
-	rsr	a2, lbeg
+	rsr	a4, lbeg
 	rsr	a3, lend
-	s32i	a2, a1, PT_LBEG
+	s32i	a4, a1, PT_LBEG
 	s32i	a3, a1, PT_LEND
 
 	/* Save SCOMPARE1 */
 
 #if XCHAL_HAVE_S32C1I
-	rsr     a2, scompare1
-	s32i    a2, a1, PT_SCOMPARE1
+	rsr     a3, scompare1
+	s32i    a3, a1, PT_SCOMPARE1
 #endif
 
 	/* Save optional registers. */
 
-	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
 	
-#ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we've just disabled them.
-	 */
-	movi	a4, trace_hardirqs_off
-	callx4	a4
-1:
-#endif
-
 	/* Go to second-level dispatcher. Set up parameters to pass to the
 	 * exception handler and call the exception handler.
 	 */
 
 	rsr	a4, excsave1
 	mov	a6, a1			# pass stack frame
-	mov	a7, a0			# pass EXCCAUSE
-	addx4	a4, a0, a4
+	mov	a7, a2			# pass EXCCAUSE
+	addx4	a4, a2, a4
 	l32i	a4, a4, EXC_TABLE_DEFAULT		# load handler
 
 	/* Call the second-level handler */
@@ -419,8 +464,17 @@ common_exception:
 	.global common_exception_return
 common_exception_return:
 
+#if XTENSA_FAKE_NMI
+	l32i	a2, a1, PT_EXCCAUSE
+	movi	a3, EXCCAUSE_MAPPED_NMI
+	beq	a2, a3, .LNMIexit
+#endif
 1:
-	rsil	a2, LOCKLEVEL
+	irq_save a2, a3
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_off
+	callx4	a4
+#endif
 
 	/* Jump if we are returning from kernel exceptions. */
 
@@ -445,6 +499,10 @@ common_exception_return:
 
 	/* Call do_signal() */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, do_notify_resume	# int do_notify_resume(struct pt_regs*)
 	mov	a6, a1
@@ -453,6 +511,10 @@ common_exception_return:
 
 3:	/* Reschedule */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, schedule	# void schedule (void)
 	callx4	a4
@@ -471,6 +533,12 @@ common_exception_return:
 	j	1b
 #endif
 
+#if XTENSA_FAKE_NMI
+.LNMIexit:
+	l32i	a3, a1, PT_PS
+	_bbci.l	a3, PS_UM_BIT, 4f
+#endif
+
 5:
 #ifdef CONFIG_DEBUG_TLB_SANITY
 	l32i	a4, a1, PT_DEPC
@@ -481,16 +549,8 @@ common_exception_return:
 6:
 4:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we'll reenable them on return.
-	 */
+	extui	a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	a4, LOCKLEVEL, 1f
 	movi	a4, trace_hardirqs_on
 	callx4	a4
 1:
@@ -568,12 +628,13 @@ user_exception_exit:
 	 *	 (if we have restored WSBITS-1 frames).
 	 */
 
+2:
 #if XCHAL_HAVE_THREADPTR
 	l32i	a3, a1, PT_THREADPTR
 	wur	a3, threadptr
 #endif
 
-2:	j	common_exception_exit
+	j	common_exception_exit
 
 	/* This is the kernel exception exit.
 	 * We avoided to do a MOVSP when we entered the exception, but we
@@ -1561,6 +1622,13 @@ ENTRY(fast_second_level_miss)
 	rfde
 
 9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
+	bnez	a0, 8b
+
+	/* Even more unlikely case active_mm == 0.
+	 * We can get here with NMI in the middle of context_switch that
+	 * touches vmalloc area.
+	 */
+	movi	a0, init_mm
 	j	8b
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -1820,7 +1888,7 @@ ENDPROC(system_call)
 	mov	a12, a0
 	.endr
 #endif
-	_entry	a1, 48
+	_entry	a1, 16
 #if XCHAL_NUM_AREGS % 12 == 0
 	mov	a8, a8
 #elif XCHAL_NUM_AREGS % 12 == 4
@@ -1844,7 +1912,7 @@ ENDPROC(system_call)
 
 ENTRY(_switch_to)
 
-	entry	a1, 16
+	entry	a1, 48
 
 	mov	a11, a3			# and 'next' (a3)
 
@@ -1864,10 +1932,8 @@ ENTRY(_switch_to)
 
 	/* Disable ints while we manipulate the stack pointer. */
 
-	rsil	a14, LOCKLEVEL
-	rsr	a3, excsave1
+	irq_save a14, a3
 	rsync
-	s32i	a3, a3, EXC_TABLE_FIXUP	/* enter critical section */
 
 	/* Switch CPENABLE */
 
@@ -1888,9 +1954,7 @@ ENTRY(_switch_to)
 	 */
 
 	rsr	a3, excsave1		# exc_table
-	movi	a6, 0
 	addi	a7, a5, PT_REGS_OFFSET
-	s32i	a6, a3, EXC_TABLE_FIXUP
 	s32i	a7, a3, EXC_TABLE_KSTK
 
 	/* restore context of the task 'next' */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 3eee94f621eb..6df31cacc4b8 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -28,7 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/platform.h>
 
-atomic_t irq_err_count;
+DECLARE_PER_CPU(unsigned long, nmi_count);
 
 asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
@@ -57,11 +57,16 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
+	unsigned cpu __maybe_unused;
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-	seq_printf(p, "%*s: ", prec, "ERR");
-	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
+#if XTENSA_FAKE_NMI
+	seq_printf(p, "%*s:", prec, "NMI");
+	for_each_online_cpu(cpu)
+		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
+	seq_puts(p, "   Non-maskable interrupts\n");
+#endif
 	return 0;
 }
 
@@ -106,6 +111,12 @@ int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler_name(irq, irq_chip,
 				handle_percpu_irq, "timer");
 		irq_clear_status_flags(irq, IRQ_LEVEL);
+#ifdef XCHAL_INTTYPE_MASK_PROFILING
+	} else if (mask & XCHAL_INTTYPE_MASK_PROFILING) {
+		irq_set_chip_and_handler_name(irq, irq_chip,
+				handle_percpu_irq, "profiling");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+#endif
 	} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
 		/* XCHAL_INTTYPE_MASK_NMI */
 		irq_set_chip_and_handler_name(irq, irq_chip,
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index e8b76b8e4b29..fb75ebf1463a 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/pci-dma.c
- *
  * DMA coherent memory allocation.
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -9,6 +7,7 @@
  * option) any later version.
  *
  * Copyright (C) 2002 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Based on version for i386.
  *
@@ -25,13 +24,107 @@
 #include <asm/io.h>
 #include <asm/cacheflush.h>
 
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		__flush_invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_TO_DEVICE:
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static void xtensa_sync_single_for_cpu(struct device *dev,
+				       dma_addr_t dma_handle, size_t size,
+				       enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_FROM_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_single_for_device(struct device *dev,
+					  dma_addr_t dma_handle, size_t size,
+					  enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_TO_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_sg_for_cpu(struct device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_cpu(dev, sg_dma_address(s),
+					   sg_dma_len(s), dir);
+	}
+}
+
+static void xtensa_sync_sg_for_device(struct device *dev,
+				      struct scatterlist *sg, int nents,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_device(dev, sg_dma_address(s),
+					      sg_dma_len(s), dir);
+	}
+}
+
 /*
  * Note: We assume that the full memory space is always mapped to 'kseg'
  *	 Otherwise we have to use page attributes (not implemented).
  */
 
-void *
-dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
+static void *xtensa_dma_alloc(struct device *dev, size_t size,
+			      dma_addr_t *handle, gfp_t flag,
+			      struct dma_attrs *attrs)
 {
 	unsigned long ret;
 	unsigned long uncached = 0;
@@ -52,20 +145,15 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
 	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
 	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
+	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
+	*handle = virt_to_bus((void *)ret);
+	__invalidate_dcache_range(ret, size);
 
-	if (ret != 0) {
-		memset((void*) ret, 0, size);
-		uncached = ret+XCHAL_KSEG_BYPASS_VADDR-XCHAL_KSEG_CACHED_VADDR;
-		*handle = virt_to_bus((void*)ret);
-		__flush_invalidate_dcache_range(ret, size);
-	}
-
-	return (void*)uncached;
+	return (void *)uncached;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *hwdev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
 	unsigned long addr = (unsigned long)vaddr +
 		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
@@ -75,24 +163,79 @@ void dma_free_coherent(struct device *hwdev, size_t size,
 
 	free_pages(addr, get_order(size));
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
+static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	dma_addr_t dma_handle = page_to_phys(page) + offset;
+
+	BUG_ON(PageHighMem(page));
+	xtensa_sync_single_for_device(dev, dma_handle, size, dir);
+	return dma_handle;
+}
 
-void consistent_sync(void *vaddr, size_t size, int direction)
+static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
+			      size_t size, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
 {
-	switch (direction) {
-	case PCI_DMA_NONE:
-		BUG();
-	case PCI_DMA_FROMDEVICE:        /* invalidate only */
-		__invalidate_dcache_range((unsigned long)vaddr,
-				          (unsigned long)size);
-		break;
+	xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
+}
 
-	case PCI_DMA_TODEVICE:          /* writeback only */
-	case PCI_DMA_BIDIRECTIONAL:     /* writeback and invalidate */
-		__flush_invalidate_dcache_range((unsigned long)vaddr,
-				    		(unsigned long)size);
-		break;
+static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
+			 int nents, enum dma_data_direction dir,
+			 struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset,
+						 s->length, dir, attrs);
+	}
+	return nents;
+}
+
+static void xtensa_unmap_sg(struct device *dev,
+			    struct scatterlist *sg, int nents,
+			    enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_unmap_page(dev, sg_dma_address(s),
+				  sg_dma_len(s), dir, attrs);
 	}
 }
-EXPORT_SYMBOL(consistent_sync);
+
+int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+struct dma_map_ops xtensa_dma_map_ops = {
+	.alloc = xtensa_dma_alloc,
+	.free = xtensa_dma_free,
+	.map_page = xtensa_map_page,
+	.unmap_page = xtensa_unmap_page,
+	.map_sg = xtensa_map_sg,
+	.unmap_sg = xtensa_unmap_sg,
+	.sync_single_for_cpu = xtensa_sync_single_for_cpu,
+	.sync_single_for_device = xtensa_sync_single_for_device,
+	.sync_sg_for_cpu = xtensa_sync_sg_for_cpu,
+	.sync_sg_for_device = xtensa_sync_sg_for_device,
+	.mapping_error = xtensa_dma_mapping_error,
+};
+EXPORT_SYMBOL(xtensa_dma_map_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init xtensa_dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(xtensa_dma_init);
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
new file mode 100644
index 000000000000..54f01188c29c
--- /dev/null
+++ b/arch/xtensa/kernel/perf_event.c
@@ -0,0 +1,454 @@
+/*
+ * Xtensa Performance Monitor Module driver
+ * See Tensilica Debug User's Guide for PMU registers documentation.
+ *
+ * Copyright (C) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+/* Global control/status for all perf counters */
+#define XTENSA_PMU_PMG			0x1000
+/* Perf counter values */
+#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
+/* Perf counter control registers */
+#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
+/* Perf counter status registers */
+#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
+
+#define XTENSA_PMU_PMG_PMEN		0x1
+
+#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
+#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
+
+#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
+#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
+#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
+#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
+#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
+#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
+#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
+
+#define XTENSA_PMU_MASK(select, mask) \
+	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
+	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
+	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
+	 XTENSA_PMU_PMCTRL_INTEN)
+
+#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
+#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
+
+struct xtensa_pmu_events {
+	/* Array of events currently on this core */
+	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
+	/* Bitmap of used hardware counters */
+	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
+};
+static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
+
+static const u32 xtensa_hw_ctl[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
+	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
+	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
+	/* Taken and non-taken branches + taken loop ends */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
+	/* Instruction-related + other global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
+	/* Data-related global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+
+static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
+		},
+	},
+};
+
+static int xtensa_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type = (config >>  0) & 0xff;
+	cache_op = (config >>  8) & 0xff;
+	cache_result = (config >> 16) & 0xff;
+
+	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
+	    cache_op >= C(OP_MAX) ||
+	    cache_result >= C(RESULT_MAX))
+		return -EINVAL;
+
+	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
+
+	if (ret == 0)
+		return -EINVAL;
+
+	return ret;
+}
+
+static inline uint32_t xtensa_pmu_read_counter(int idx)
+{
+	return get_er(XTENSA_PMU_PM(idx));
+}
+
+static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
+{
+	set_er(v, XTENSA_PMU_PM(idx));
+}
+
+static void xtensa_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	uint64_t prev_raw_count, new_raw_count;
+	int64_t delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static bool xtensa_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	bool rc = false;
+	s64 left;
+
+	if (!is_sampling_event(event)) {
+		left = XTENSA_PMU_COUNTER_MAX;
+	} else {
+		s64 period = hwc->sample_period;
+
+		left = local64_read(&hwc->period_left);
+		if (left <= -period) {
+			left = period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		} else if (left <= 0) {
+			left += period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		}
+		if (left > XTENSA_PMU_COUNTER_MAX)
+			left = XTENSA_PMU_COUNTER_MAX;
+	}
+
+	local64_set(&hwc->prev_count, -left);
+	xtensa_pmu_write_counter(idx, -left);
+	perf_event_update_userpage(event);
+
+	return rc;
+}
+
+static void xtensa_pmu_enable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static void xtensa_pmu_disable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static int xtensa_pmu_event_init(struct perf_event *event)
+{
+	int ret;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
+		    xtensa_hw_ctl[event->attr.config] == 0)
+			return -EINVAL;
+		event->hw.config = xtensa_hw_ctl[event->attr.config];
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = xtensa_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		event->hw.config = ret;
+		return 0;
+
+	case PERF_TYPE_RAW:
+		/* Not 'previous counter' select */
+		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
+		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
+			return -EINVAL;
+		event->hw.config = (event->attr.config &
+				    (XTENSA_PMU_PMCTRL_KRNLCNT |
+				     XTENSA_PMU_PMCTRL_TRACELEVEL |
+				     XTENSA_PMU_PMCTRL_SELECT |
+				     XTENSA_PMU_PMCTRL_MASK)) |
+			XTENSA_PMU_PMCTRL_INTEN;
+		return 0;
+
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * Starts/Stops a counter present on the PMU. The PMI handler
+ * should stop the counter when perf_event_overflow() returns
+ * !0. ->start() will be used to continue.
+ */
+static void xtensa_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		xtensa_perf_event_set_period(event, hwc, idx);
+	}
+
+	hwc->state = 0;
+
+	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
+}
+
+static void xtensa_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		set_er(0, XTENSA_PMU_PMCTRL(idx));
+		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
+		       XTENSA_PMU_PMSTAT(idx));
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		xtensa_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * Adds/Removes a counter to/from the PMU, can be done inside
+ * a transaction, see the ->*_txn() methods.
+ */
+static int xtensa_pmu_add(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (__test_and_set_bit(idx, ev->used_mask)) {
+		idx = find_first_zero_bit(ev->used_mask,
+					  XCHAL_NUM_PERF_COUNTERS);
+		if (idx == XCHAL_NUM_PERF_COUNTERS)
+			return -EAGAIN;
+
+		__set_bit(idx, ev->used_mask);
+		hwc->idx = idx;
+	}
+	ev->event[idx] = event;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		xtensa_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void xtensa_pmu_del(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+
+	xtensa_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, ev->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static void xtensa_pmu_read(struct perf_event *event)
+{
+	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+	struct perf_callchain_entry *entry = data;
+
+	perf_callchain_store(entry, frame->pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+				callchain_trace, NULL, entry);
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+			 struct pt_regs *regs)
+{
+	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+			      callchain_trace, entry);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned i;
+
+	local_irq_save(flags);
+	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
+		get_er(XTENSA_PMU_PMG));
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
+		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
+			i, get_er(XTENSA_PMU_PM(i)),
+			i, get_er(XTENSA_PMU_PMCTRL(i)),
+			i, get_er(XTENSA_PMU_PMSTAT(i)));
+	local_irq_restore(flags);
+}
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	unsigned i;
+
+	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
+	     i < XCHAL_NUM_PERF_COUNTERS;
+	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
+		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
+		struct perf_event *event = ev->event[i];
+		struct hw_perf_event *hwc = &event->hw;
+		u64 last_period;
+
+		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
+			continue;
+
+		set_er(v, XTENSA_PMU_PMSTAT(i));
+		xtensa_perf_event_update(event, hwc, i);
+		last_period = hwc->last_period;
+		if (xtensa_perf_event_set_period(event, hwc, i)) {
+			struct perf_sample_data data;
+			struct pt_regs *regs = get_irq_regs();
+
+			perf_sample_data_init(&data, 0, last_period);
+			if (perf_event_overflow(event, &data, regs))
+				xtensa_pmu_stop(event, 0);
+		}
+
+		rc = IRQ_HANDLED;
+	}
+	return rc;
+}
+
+static struct pmu xtensa_pmu = {
+	.pmu_enable = xtensa_pmu_enable,
+	.pmu_disable = xtensa_pmu_disable,
+	.event_init = xtensa_pmu_event_init,
+	.add = xtensa_pmu_add,
+	.del = xtensa_pmu_del,
+	.start = xtensa_pmu_start,
+	.stop = xtensa_pmu_stop,
+	.read = xtensa_pmu_read,
+};
+
+static void xtensa_pmu_setup(void)
+{
+	unsigned i;
+
+	set_er(0, XTENSA_PMU_PMG);
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
+		set_er(0, XTENSA_PMU_PMCTRL(i));
+		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
+	}
+}
+
+static int xtensa_pmu_notifier(struct notifier_block *self,
+			       unsigned long action, void *data)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		xtensa_pmu_setup();
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init xtensa_pmu_init(void)
+{
+	int ret;
+	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
+
+	perf_cpu_notifier(xtensa_pmu_notifier);
+#if XTENSA_FAKE_NMI
+	enable_irq(irq);
+#else
+	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
+			  "pmu", NULL);
+	if (ret < 0)
+		return ret;
+#endif
+
+	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
+	if (ret)
+		free_irq(irq, NULL);
+
+	return ret;
+}
+early_initcall(xtensa_pmu_init);
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7d2c317bd98b..7538d802b65a 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -1,11 +1,12 @@
 /*
- * arch/xtensa/kernel/stacktrace.c
+ * Kernel and userspace stack tracing.
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2013 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -13,6 +14,170 @@
 
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
+
+/* Address of common_exception_return, used to check the
+ * transition from kernel to user space.
+ */
+extern int common_exception_return;
+
+/* A struct that maps to the part of the frame containing the a0 and
+ * a1 registers.
+ */
+struct frame_start {
+	unsigned long a0;
+	unsigned long a1;
+};
+
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data)
+{
+	unsigned long windowstart = regs->windowstart;
+	unsigned long windowbase = regs->windowbase;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+	unsigned long pc = regs->pc;
+	struct stackframe frame;
+	int index;
+
+	if (!depth--)
+		return;
+
+	frame.pc = pc;
+	frame.sp = a1;
+
+	if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+		return;
+
+	/* Two steps:
+	 *
+	 * 1. Look through the register window for the
+	 * previous PCs in the call trace.
+	 *
+	 * 2. Look on the stack.
+	 */
+
+	/* Step 1.  */
+	/* Rotate WINDOWSTART to move the bit corresponding to
+	 * the current window to the bit #0.
+	 */
+	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
+
+	/* Look for bits that are set, they correspond to
+	 * valid windows.
+	 */
+	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
+		if (windowstart & (1 << index)) {
+			/* Get the PC from a0 and a1. */
+			pc = MAKE_PC_FROM_RA(a0, pc);
+			/* Read a0 and a1 from the
+			 * corresponding position in AREGs.
+			 */
+			a0 = regs->areg[index * 4];
+			a1 = regs->areg[index * 4 + 1];
+
+			frame.pc = pc;
+			frame.sp = a1;
+
+			if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+				return;
+		}
+
+	/* Step 2. */
+	/* We are done with the register window, we need to
+	 * look through the stack.
+	 */
+	if (!depth)
+		return;
+
+	/* Start from the a1 register. */
+	/* a1 = regs->areg[1]; */
+	while (a0 != 0 && depth--) {
+		struct frame_start frame_start;
+		/* Get the location for a1, a0 for the
+		 * previous frame from the current a1.
+		 */
+		unsigned long *psp = (unsigned long *)a1;
+
+		psp -= 4;
+
+		/* Check if the region is OK to access. */
+		if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
+			return;
+		/* Copy a1, a0 from user space stack frame. */
+		if (__copy_from_user_inatomic(&frame_start, psp,
+					      sizeof(frame_start)))
+			return;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = frame_start.a0;
+		a1 = frame_start.a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+			return;
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_user);
+
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data)
+{
+	unsigned long pc = regs->depc > VALID_DOUBLE_EXCEPTION_ADDRESS ?
+		regs->depc : regs->pc;
+	unsigned long sp_start, sp_end;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+
+	sp_start = a1 & ~(THREAD_SIZE - 1);
+	sp_end = sp_start + THREAD_SIZE;
+
+	/* Spill the register window to the stack first. */
+	spill_registers();
+
+	/* Read the stack frames one by one and create the PC
+	 * from the a0 and a1 registers saved there.
+	 */
+	while (a1 > sp_start && a1 < sp_end && depth--) {
+		struct stackframe frame;
+		unsigned long *psp = (unsigned long *)a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (kernel_text_address(pc) && kfn(&frame, data))
+			return;
+
+		if (pc == (unsigned long)&common_exception_return) {
+			regs = (struct pt_regs *)a1;
+			if (user_mode(regs)) {
+				if (ufn == NULL)
+					return;
+				xtensa_backtrace_user(regs, depth, ufn, data);
+				return;
+			}
+			a0 = regs->areg[0];
+			a1 = regs->areg[1];
+			continue;
+		}
+
+		sp_start = a1;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = *(psp - 4);
+		a1 = *(psp - 3);
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_kernel);
+
+#endif
 
 void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9d2f45f010ef..42d441f7898b 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -62,6 +62,7 @@ extern void fast_coprocessor(void);
 
 extern void do_illegal_instruction (struct pt_regs*);
 extern void do_interrupt (struct pt_regs*);
+extern void do_nmi(struct pt_regs *);
 extern void do_unaligned_user (struct pt_regs*);
 extern void do_multihit (struct pt_regs*, unsigned long);
 extern void do_page_fault (struct pt_regs*, unsigned long);
@@ -146,6 +147,9 @@ COPROCESSOR(6),
 #if XTENSA_HAVE_COPROCESSOR(7)
 COPROCESSOR(7),
 #endif
+#if XTENSA_FAKE_NMI
+{ EXCCAUSE_MAPPED_NMI,			0,		do_nmi },
+#endif
 { EXCCAUSE_MAPPED_DEBUG,		0,		do_debug },
 { -1, -1, 0 }
 
@@ -199,6 +203,28 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
 
 extern void do_IRQ(int, struct pt_regs *);
 
+#if XTENSA_FAKE_NMI
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
+
+DEFINE_PER_CPU(unsigned long, nmi_count);
+
+void do_nmi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
+		trace_hardirqs_off();
+
+	old_regs = set_irq_regs(regs);
+	nmi_enter();
+	++*this_cpu_ptr(&nmi_count);
+	xtensa_pmu_irq_handler(0, NULL);
+	nmi_exit();
+	set_irq_regs(old_regs);
+}
+#endif
+
 void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
@@ -211,8 +237,11 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL6_MASK,
 		XCHAL_INTLEVEL7_MASK,
 	};
-	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct pt_regs *old_regs;
+
+	trace_hardirqs_off();
 
+	old_regs = set_irq_regs(regs);
 	irq_enter();
 
 	for (;;) {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 1b397a902292..abcdb527f18a 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -627,7 +627,11 @@ ENTRY(_Level\level\()InterruptVector)
 	wsr	a0, excsave2
 	rsr	a0, epc\level
 	wsr	a0, epc1
+	.if	\level <= LOCKLEVEL
 	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
+	.else
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	.endif
 	wsr	a0, exccause
 	rsr	a0, eps\level
 					# branch to user or kernel vector
@@ -682,11 +686,13 @@ ENDPROC(_WindowOverflow4)
 	.align 4
 _SimulateUserKernelVectorException:
 	addi	a0, a0, (1 << PS_EXCM_BIT)
+#if !XTENSA_FAKE_NMI
 	wsr	a0, ps
+#endif
 	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
-	rsr	a0, excsave2		# restore a0
+	xsr	a0, excsave2		# restore a0
 	j	_KernelExceptionVector	# simulate kernel vector exception
-1:	rsr	a0, excsave2		# restore a0
+1:	xsr	a0, excsave2		# restore a0
 	j	_UserExceptionVector	# simulate user vector exception
 #endif
 
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 83a44a33cfa1..c9784c1b18d8 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/perf_event.h>
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
@@ -142,6 +143,12 @@ good_area:
 	}
 
 	up_read(&mm->mmap_sem);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	if (flags & VM_FAULT_MAJOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+	else if (flags & VM_FAULT_MINOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+
 	return;
 
 	/* Something tried to access memory that isn't in our memory map..
diff --git a/arch/xtensa/oprofile/backtrace.c b/arch/xtensa/oprofile/backtrace.c
index 5f03a593d84f..8f952034e161 100644
--- a/arch/xtensa/oprofile/backtrace.c
+++ b/arch/xtensa/oprofile/backtrace.c
@@ -2,168 +2,26 @@
  * @file backtrace.c
  *
  * @remark Copyright 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  * @remark Read the file COPYING
  *
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
+#include <asm/stacktrace.h>
 
-/* Address of common_exception_return, used to check the
- * transition from kernel to user space.
- */
-extern int common_exception_return;
-
-/* A struct that maps to the part of the frame containing the a0 and
- * a1 registers.
- */
-struct frame_start {
-	unsigned long a0;
-	unsigned long a1;
-};
-
-static void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth)
-{
-	unsigned long windowstart = regs->windowstart;
-	unsigned long windowbase = regs->windowbase;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-	unsigned long pc = MAKE_PC_FROM_RA(a0, regs->pc);
-	int index;
-
-	/* First add the current PC to the trace. */
-	if (pc != 0 && pc <= TASK_SIZE)
-		oprofile_add_trace(pc);
-	else
-		return;
-
-	/* Two steps:
-	 *
-	 * 1. Look through the register window for the
-	 * previous PCs in the call trace.
-	 *
-	 * 2. Look on the stack.
-	 */
-
-	/* Step 1.  */
-	/* Rotate WINDOWSTART to move the bit corresponding to
-	 * the current window to the bit #0.
-	 */
-	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
-
-	/* Look for bits that are set, they correspond to
-	 * valid windows.
-	 */
-	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
-		if (windowstart & (1 << index)) {
-			/* Read a0 and a1 from the
-			 * corresponding position in AREGs.
-			 */
-			a0 = regs->areg[index * 4];
-			a1 = regs->areg[index * 4 + 1];
-			/* Get the PC from a0 and a1. */
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			/* Add the PC to the trace. */
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-
-	/* Step 2. */
-	/* We are done with the register window, we need to
-	 * look through the stack.
-	 */
-	if (depth > 0) {
-		/* Start from the a1 register. */
-		/* a1 = regs->areg[1]; */
-		while (a0 != 0 && depth--) {
-
-			struct frame_start frame_start;
-			/* Get the location for a1, a0 for the
-			 * previous frame from the current a1.
-			 */
-			unsigned long *psp = (unsigned long *)a1;
-			psp -= 4;
-
-			/* Check if the region is OK to access. */
-			if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
-				return;
-			/* Copy a1, a0 from user space stack frame. */
-			if (__copy_from_user_inatomic(&frame_start, psp,
-						sizeof(frame_start)))
-				return;
-
-			a0 = frame_start.a0;
-			a1 = frame_start.a1;
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-	}
-}
-
-static void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth)
+static int xtensa_backtrace_cb(struct stackframe *frame, void *data)
 {
-	unsigned long pc = regs->pc;
-	unsigned long *psp;
-	unsigned long sp_start, sp_end;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-
-	sp_start = a1 & ~(THREAD_SIZE-1);
-	sp_end = sp_start + THREAD_SIZE;
-
-	/* Spill the register window to the stack first. */
-	spill_registers();
-
-	/* Read the stack frames one by one and create the PC
-	 * from the a0 and a1 registers saved there.
-	 */
-	while (a1 > sp_start && a1 < sp_end && depth--) {
-		pc = MAKE_PC_FROM_RA(a0, pc);
-
-		/* Add the PC to the trace. */
-		oprofile_add_trace(pc);
-		if (pc == (unsigned long) &common_exception_return) {
-			regs = (struct pt_regs *)a1;
-			if (user_mode(regs)) {
-				pc = regs->pc;
-				if (pc != 0 && pc <= TASK_SIZE)
-					oprofile_add_trace(pc);
-				else
-					return;
-				return xtensa_backtrace_user(regs, depth);
-			}
-			a0 = regs->areg[0];
-			a1 = regs->areg[1];
-			continue;
-		}
-
-		psp = (unsigned long *)a1;
-
-		a0 = *(psp - 4);
-		a1 = *(psp - 3);
-
-		if (a1 <= (unsigned long)psp)
-			return;
-
-	}
-	return;
+	oprofile_add_trace(frame->pc);
+	return 0;
 }
 
 void xtensa_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	if (user_mode(regs))
-		xtensa_backtrace_user(regs, depth);
+		xtensa_backtrace_user(regs, depth, xtensa_backtrace_cb, NULL);
 	else
-		xtensa_backtrace_kernel(regs, depth);
+		xtensa_backtrace_kernel(regs, depth, xtensa_backtrace_cb,
+					xtensa_backtrace_cb, NULL);
 }
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 8ab021b1f141..976a38594537 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -105,13 +105,17 @@ static char *split_if_spec(char *str, ...)
 
 	va_start(ap, str);
 	while ((arg = va_arg(ap, char**)) != NULL) {
-		if (*str == '\0')
+		if (*str == '\0') {
+			va_end(ap);
 			return NULL;
+		}
 		end = strchr(str, ',');
 		if (end != str)
 			*arg = str;
-		if (end == NULL)
+		if (end == NULL) {
+			va_end(ap);
 			return NULL;
+		}
 		*end++ = '\0';
 		str = end;
 	}