344 files changed, 8027 insertions, 5182 deletions
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index b8ce18f485d3..310a4ce1dccc 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -106,7 +106,7 @@ op_axp_stop(void)
 }
 
 static int
-op_axp_create_files(struct super_block *sb, struct dentry *root)
+op_axp_create_files(struct dentry *root)
 {
 	int i;
 
@@ -115,23 +115,23 @@ op_axp_create_files(struct super_block *sb, struct dentry *root)
 		char buf[4];
 
 		snprintf(buf, sizeof buf, "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
+		dir = oprofilefs_mkdir(root, buf);
 
-		oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
-                oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
+		oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
+                oprofilefs_create_ulong(dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(dir, "count", &ctr[i].count);
 		/* Dummies.  */
-		oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
+		oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
 	}
 
 	if (model->can_set_proc_mode) {
-		oprofilefs_create_ulong(sb, root, "enable_pal",
+		oprofilefs_create_ulong(root, "enable_pal",
 					&sys.enable_pal);
-		oprofilefs_create_ulong(sb, root, "enable_kernel",
+		oprofilefs_create_ulong(root, "enable_kernel",
 					&sys.enable_kernel);
-		oprofilefs_create_ulong(sb, root, "enable_user",
+		oprofilefs_create_ulong(root, "enable_user",
 					&sys.enable_user);
 	}
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 43594d5116ef..cd5c1c97b043 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2064,8 +2064,7 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.
+	  initially work for you.
 
 config ATAGS_PROC
 	bool "Export atags in procfs"
diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts
index 444b4ede0d60..d318987d44a1 100644
--- a/arch/arm/boot/dts/am335x-bone.dts
+++ b/arch/arm/boot/dts/am335x-bone.dts
@@ -120,6 +120,35 @@
 			status = "okay";
 		};
 
+		musb: usb@47400000 {
+			status = "okay";
+
+			control@44e10000 {
+				status = "okay";
+			};
+
+			usb-phy@47401300 {
+				status = "okay";
+			};
+
+			usb-phy@47401b00 {
+				status = "okay";
+			};
+
+			usb@47401000 {
+				status = "okay";
+			};
+
+			usb@47401800 {
+				status = "okay";
+				dr_mode = "host";
+			};
+
+			dma-controller@07402000  {
+				status = "okay";
+			};
+		};
+
 		i2c0: i2c@44e0b000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&i2c0_pins>;
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index 3aee1a43782d..e8ec8756e498 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -171,6 +171,35 @@
 			};
 		};
 
+		musb: usb@47400000 {
+			status = "okay";
+
+			control@44e10000 {
+				status = "okay";
+			};
+
+			usb-phy@47401300 {
+				status = "okay";
+			};
+
+			usb-phy@47401b00 {
+				status = "okay";
+			};
+
+			usb@47401000 {
+				status = "okay";
+			};
+
+			usb@47401800 {
+				status = "okay";
+				dr_mode = "host";
+			};
+
+			dma-controller@07402000  {
+				status = "okay";
+			};
+		};
+
 		i2c1: i2c@4802a000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&i2c1_pins>;
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 0c8ad173d2b0..4f339fa91c57 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -14,6 +14,7 @@
 /dts-v1/;
 
 #include "am33xx.dtsi"
+#include <dt-bindings/pwm/pwm.h>
 
 / {
 	model = "TI AM335x EVM-SK";
@@ -207,6 +208,22 @@
 			};
 		};
 
+		musb: usb@47400000 {
+			status = "okay";
+
+			control@44e10000 {
+				status = "okay";
+			};
+
+			usb-phy@47401300 {
+				status = "okay";
+			};
+
+			usb@47401000 {
+				status = "okay";
+			};
+		};
+
 		epwmss2: epwmss@48304000 {
 			status = "okay";
 
@@ -298,7 +315,7 @@
 
 	backlight {
 		compatible = "pwm-backlight";
-		pwms = <&ecap2 0 50000 1>;
+		pwms = <&ecap2 0 50000 PWM_POLARITY_INVERTED>;
 		brightness-levels = <0 58 61 66 75 90 125 170 255>;
 		default-brightness-level = <8>;
 	};
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 38b446ba1ce1..f9c5da9c7fe1 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -26,6 +26,10 @@
 		serial5 = &uart5;
 		d_can0 = &dcan0;
 		d_can1 = &dcan1;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		phy0 = &usb0_phy;
+		phy1 = &usb1_phy;
 	};
 
 	cpus {
@@ -333,21 +337,132 @@
 			status = "disabled";
 		};
 
-		usb@47400000 {
-			compatible = "ti,musb-am33xx";
-			reg = <0x47400000 0x1000	/* usbss */
-			       0x47401000 0x800		/* musb instance 0 */
-			       0x47401800 0x800>;	/* musb instance 1 */
-			interrupts = <17		/* usbss */
-				      18		/* musb instance 0 */
-				      19>;		/* musb instance 1 */
-			multipoint = <1>;
-			num-eps = <16>;
-			ram-bits = <12>;
-			port0-mode = <3>;
-			port1-mode = <3>;
-			power = <250>;
+		usb: usb@47400000 {
+			compatible = "ti,am33xx-usb";
+			reg = <0x47400000 0x1000>;
+			ranges;
+			#address-cells = <1>;
+			#size-cells = <1>;
 			ti,hwmods = "usb_otg_hs";
+			status = "disabled";
+
+			ctrl_mod: control@44e10000 {
+				compatible = "ti,am335x-usb-ctrl-module";
+				reg = <0x44e10620 0x10
+					0x44e10648 0x4>;
+				reg-names = "phy_ctrl", "wakeup";
+				status = "disabled";
+			};
+
+			usb0_phy: usb-phy@47401300 {
+				compatible = "ti,am335x-usb-phy";
+				reg = <0x47401300 0x100>;
+				reg-names = "phy";
+				status = "disabled";
+				ti,ctrl_mod = <&ctrl_mod>;
+			};
+
+			usb0: usb@47401000 {
+				compatible = "ti,musb-am33xx";
+				status = "disabled";
+				reg = <0x47401400 0x400
+					0x47401000 0x200>;
+				reg-names = "mc", "control";
+
+				interrupts = <18>;
+				interrupt-names = "mc";
+				dr_mode = "otg";
+				mentor,multipoint = <1>;
+				mentor,num-eps = <16>;
+				mentor,ram-bits = <12>;
+				mentor,power = <500>;
+				phys = <&usb0_phy>;
+
+				dmas = <&cppi41dma  0 0 &cppi41dma  1 0
+					&cppi41dma  2 0 &cppi41dma  3 0
+					&cppi41dma  4 0 &cppi41dma  5 0
+					&cppi41dma  6 0 &cppi41dma  7 0
+					&cppi41dma  8 0 &cppi41dma  9 0
+					&cppi41dma 10 0 &cppi41dma 11 0
+					&cppi41dma 12 0 &cppi41dma 13 0
+					&cppi41dma 14 0 &cppi41dma  0 1
+					&cppi41dma  1 1 &cppi41dma  2 1
+					&cppi41dma  3 1 &cppi41dma  4 1
+					&cppi41dma  5 1 &cppi41dma  6 1
+					&cppi41dma  7 1 &cppi41dma  8 1
+					&cppi41dma  9 1 &cppi41dma 10 1
+					&cppi41dma 11 1 &cppi41dma 12 1
+					&cppi41dma 13 1 &cppi41dma 14 1>;
+				dma-names =
+					"rx1", "rx2", "rx3", "rx4", "rx5", "rx6", "rx7",
+					"rx8", "rx9", "rx10", "rx11", "rx12", "rx13",
+					"rx14", "rx15",
+					"tx1", "tx2", "tx3", "tx4", "tx5", "tx6", "tx7",
+					"tx8", "tx9", "tx10", "tx11", "tx12", "tx13",
+					"tx14", "tx15";
+			};
+
+			usb1_phy: usb-phy@47401b00 {
+				compatible = "ti,am335x-usb-phy";
+				reg = <0x47401b00 0x100>;
+				reg-names = "phy";
+				status = "disabled";
+				ti,ctrl_mod = <&ctrl_mod>;
+			};
+
+			usb1: usb@47401800 {
+				compatible = "ti,musb-am33xx";
+				status = "disabled";
+				reg = <0x47401c00 0x400
+					0x47401800 0x200>;
+				reg-names = "mc", "control";
+				interrupts = <19>;
+				interrupt-names = "mc";
+				dr_mode = "otg";
+				mentor,multipoint = <1>;
+				mentor,num-eps = <16>;
+				mentor,ram-bits = <12>;
+				mentor,power = <500>;
+				phys = <&usb1_phy>;
+
+				dmas = <&cppi41dma 15 0 &cppi41dma 16 0
+					&cppi41dma 17 0 &cppi41dma 18 0
+					&cppi41dma 19 0 &cppi41dma 20 0
+					&cppi41dma 21 0 &cppi41dma 22 0
+					&cppi41dma 23 0 &cppi41dma 24 0
+					&cppi41dma 25 0 &cppi41dma 26 0
+					&cppi41dma 27 0 &cppi41dma 28 0
+					&cppi41dma 29 0 &cppi41dma 15 1
+					&cppi41dma 16 1 &cppi41dma 17 1
+					&cppi41dma 18 1 &cppi41dma 19 1
+					&cppi41dma 20 1 &cppi41dma 21 1
+					&cppi41dma 22 1 &cppi41dma 23 1
+					&cppi41dma 24 1 &cppi41dma 25 1
+					&cppi41dma 26 1 &cppi41dma 27 1
+					&cppi41dma 28 1 &cppi41dma 29 1>;
+				dma-names =
+					"rx1", "rx2", "rx3", "rx4", "rx5", "rx6", "rx7",
+					"rx8", "rx9", "rx10", "rx11", "rx12", "rx13",
+					"rx14", "rx15",
+					"tx1", "tx2", "tx3", "tx4", "tx5", "tx6", "tx7",
+					"tx8", "tx9", "tx10", "tx11", "tx12", "tx13",
+					"tx14", "tx15";
+			};
+
+			cppi41dma: dma-controller@07402000 {
+				compatible = "ti,am3359-cppi41";
+				reg =  <0x47400000 0x1000
+					0x47402000 0x1000
+					0x47403000 0x1000
+					0x47404000 0x4000>;
+				reg-names = "glue", "controller", "scheduler", "queuemgr";
+				interrupts = <17>;
+				interrupt-names = "glue";
+				#dma-cells = <2>;
+				#dma-channels = <30>;
+				#dma-requests = <256>;
+				status = "disabled";
+			};
 		};
 
 		epwmss0: epwmss@48300000 {
diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi
index a0f2721ea583..8678e0c11119 100644
--- a/arch/arm/boot/dts/atlas6.dtsi
+++ b/arch/arm/boot/dts/atlas6.dtsi
@@ -329,6 +329,12 @@
 						sirf,function = "uart0";
 					};
 				};
+				uart0_noflow_pins_a: uart0@1 {
+					uart {
+						sirf,pins = "uart0_nostreamctrlgrp";
+						sirf,function = "uart0_nostreamctrl";
+					};
+				};
 				uart1_pins_a: uart1@0 {
 					uart {
 						sirf,pins = "uart1grp";
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index ef57277fc38f..376090f07231 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -405,7 +405,7 @@
 	};
 
 	i2s0: i2s@03830000 {
-		compatible = "samsung,i2s-v5";
+		compatible = "samsung,s5pv210-i2s";
 		reg = <0x03830000 0x100>;
 		dmas = <&pdma0 10
 			&pdma0 9
@@ -415,16 +415,13 @@
 			<&clock_audss EXYNOS_I2S_BUS>,
 			<&clock_audss EXYNOS_SCLK_I2S>;
 		clock-names = "iis", "i2s_opclk0", "i2s_opclk1";
-		samsung,supports-6ch;
-		samsung,supports-rstclr;
-		samsung,supports-secdai;
 		samsung,idma-addr = <0x03000000>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&i2s0_bus>;
 	};
 
 	i2s1: i2s@12D60000 {
-		compatible = "samsung,i2s-v5";
+		compatible = "samsung,s3c6410-i2s";
 		reg = <0x12D60000 0x100>;
 		dmas = <&pdma1 12
 			&pdma1 11>;
@@ -436,7 +433,7 @@
 	};
 
 	i2s2: i2s@12D70000 {
-		compatible = "samsung,i2s-v5";
+		compatible = "samsung,s3c6410-i2s";
 		reg = <0x12D70000 0x100>;
 		dmas = <&pdma0 12
 			&pdma0 11>;
diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi
index ff7f5d855845..586134e2a382 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -248,6 +248,7 @@
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
 		interrupt-map = <0x0 0 &gic 53>;
+		num-lanes = <4>;
 	};
 
 	pcie@2a0000 {
@@ -267,5 +268,6 @@
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
 		interrupt-map = <0x0 0 &gic 56>;
+		num-lanes = <4>;
 	};
 };
diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts
index e035f4664b97..15715d921d14 100644
--- a/arch/arm/boot/dts/imx28-evk.dts
+++ b/arch/arm/boot/dts/imx28-evk.dts
@@ -220,6 +220,7 @@
 			auart0: serial@8006a000 {
 				pinctrl-names = "default";
 				pinctrl-0 = <&auart0_pins_a>;
+				fsl,uart-has-rtscts;
 				status = "okay";
 			};
 
diff --git a/arch/arm/boot/dts/msm8660-surf.dts b/arch/arm/boot/dts/msm8660-surf.dts
index cdc010e0f93e..386d42870215 100644
--- a/arch/arm/boot/dts/msm8660-surf.dts
+++ b/arch/arm/boot/dts/msm8660-surf.dts
@@ -38,7 +38,7 @@
 	};
 
 	serial@19c40000 {
-		compatible = "qcom,msm-hsuart", "qcom,msm-uart";
+		compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
 		reg = <0x19c40000 0x1000>,
 		      <0x19c00000 0x1000>;
 		interrupts = <0 195 0x0>;
diff --git a/arch/arm/boot/dts/msm8960-cdp.dts b/arch/arm/boot/dts/msm8960-cdp.dts
index 9c1167b0459b..93e9f7e0b7ad 100644
--- a/arch/arm/boot/dts/msm8960-cdp.dts
+++ b/arch/arm/boot/dts/msm8960-cdp.dts
@@ -38,7 +38,7 @@
 	};
 
 	serial@16440000 {
-		compatible = "qcom,msm-hsuart", "qcom,msm-uart";
+		compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
 		reg = <0x16440000 0x1000>,
 		      <0x16400000 0x1000>;
 		interrupts = <0 154 0x0>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index e643620417a9..07be2cd7b318 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -644,7 +644,7 @@
 			utmi-mode = <2>;
 			ranges;
 			dwc3@4a030000 {
-				compatible = "synopsys,dwc3";
+				compatible = "snps,dwc3";
 				reg = <0x4a030000 0x1000>;
 				interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
 				usb-phy = <&usb2_phy>, <&usb3_phy>;
diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts
index 40e6fb280333..c8242533268f 100644
--- a/arch/arm/boot/dts/tegra20-seaboard.dts
+++ b/arch/arm/boot/dts/tegra20-seaboard.dts
@@ -566,7 +566,6 @@
 
 	usb@c5000000 {
 		status = "okay";
-		nvidia,vbus-gpio = <&gpio TEGRA_GPIO(D, 0) GPIO_ACTIVE_HIGH>;
 		dr_mode = "otg";
 	};
 
diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts
index 37c93d3c4812..1e9d33adb925 100644
--- a/arch/arm/boot/dts/tegra20-trimslice.dts
+++ b/arch/arm/boot/dts/tegra20-trimslice.dts
@@ -312,7 +312,6 @@
 
 	usb@c5000000 {
 		status = "okay";
-		nvidia,vbus-gpio = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_HIGH>;
 	};
 
 	usb-phy@c5000000 {
diff --git a/arch/arm/boot/dts/tegra20-whistler.dts b/arch/arm/boot/dts/tegra20-whistler.dts
index a3d0ebad78a1..c703197dca6e 100644
--- a/arch/arm/boot/dts/tegra20-whistler.dts
+++ b/arch/arm/boot/dts/tegra20-whistler.dts
@@ -509,7 +509,6 @@
 
 	usb@c5000000 {
 		status = "okay";
-		nvidia,vbus-gpio = <&tca6416 0 GPIO_ACTIVE_HIGH>;
 	};
 
 	usb-phy@c5000000 {
@@ -519,7 +518,6 @@
 
 	usb@c5008000 {
 		status = "okay";
-		nvidia,vbus-gpio = <&tca6416 1 GPIO_ACTIVE_HIGH>;
 	};
 
 	usb-phy@c5008000 {
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 9653fd8288d2..e4570834512e 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -477,13 +477,13 @@
 			 <&tegra_car TEGRA20_CLK_USBD>;
 		clock-names = "reg", "pll_u", "timer", "utmi-pads";
 		nvidia,has-legacy-mode;
-		hssync_start_delay = <9>;
-		idle_wait_delay = <17>;
-		elastic_limit = <16>;
-		term_range_adj = <6>;
-		xcvr_setup = <9>;
-		xcvr_lsfslew = <1>;
-		xcvr_lsrslew = <1>;
+		nvidia,hssync-start-delay = <9>;
+		nvidia,idle-wait-delay = <17>;
+		nvidia,elastic-limit = <16>;
+		nvidia,term-range-adj = <6>;
+		nvidia,xcvr-setup = <9>;
+		nvidia,xcvr-lsfslew = <1>;
+		nvidia,xcvr-lsrslew = <1>;
 		status = "disabled";
 	};
 
@@ -527,13 +527,13 @@
 			 <&tegra_car TEGRA20_CLK_CLK_M>,
 			 <&tegra_car TEGRA20_CLK_USBD>;
 		clock-names = "reg", "pll_u", "timer", "utmi-pads";
-		hssync_start_delay = <9>;
-		idle_wait_delay = <17>;
-		elastic_limit = <16>;
-		term_range_adj = <6>;
-		xcvr_setup = <9>;
-		xcvr_lsfslew = <2>;
-		xcvr_lsrslew = <2>;
+		nvidia,hssync-start-delay = <9>;
+		nvidia,idle-wait-delay = <17>;
+		nvidia,elastic-limit = <16>;
+		nvidia,term-range-adj = <6>;
+		nvidia,xcvr-setup = <9>;
+		nvidia,xcvr-lsfslew = <2>;
+		nvidia,xcvr-lsrslew = <2>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/wm8850-w70v2.dts b/arch/arm/boot/dts/wm8850-w70v2.dts
index 90e913fb64be..7a563d2523b0 100644
--- a/arch/arm/boot/dts/wm8850-w70v2.dts
+++ b/arch/arm/boot/dts/wm8850-w70v2.dts
@@ -11,13 +11,14 @@
 
 /dts-v1/;
 /include/ "wm8850.dtsi"
+#include <dt-bindings/pwm/pwm.h>
 
 / {
 	model = "Wondermedia WM8850-W70v2 Tablet";
 
 	backlight {
 		compatible = "pwm-backlight";
-		pwms = <&pwm 0 50000 1>;	/* duty inverted */
+		pwms = <&pwm 0 50000 PWM_POLARITY_INVERTED>;
 
 		brightness-levels = <0 40 60 80 100 130 190 255>;
 		default-brightness-level = <5>;
diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig
index 845f5cdf62b5..e7e94948d194 100644
--- a/arch/arm/configs/bockw_defconfig
+++ b/arch/arm/configs/bockw_defconfig
@@ -82,6 +82,13 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
 # CONFIG_HWMON is not set
 CONFIG_I2C=y
 CONFIG_I2C_RCAR=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MEDIA_CAMERA_SUPPORT=y
+CONFIG_V4L_PLATFORM_DRIVERS=y
+CONFIG_SOC_CAMERA=y
+CONFIG_VIDEO_RCAR_VIN=y
+# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set
+CONFIG_VIDEO_ML86V7667=y
 CONFIG_SPI=y
 CONFIG_SPI_SH_HSPI=y
 CONFIG_USB=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 62e968cac9dc..1f36b823905f 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -104,6 +104,7 @@ CONFIG_IP_SCTP=y
 CONFIG_VLAN_8021Q=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig
index 494e70aeb9e1..c50e52be4463 100644
--- a/arch/arm/configs/marzen_defconfig
+++ b/arch/arm/configs/marzen_defconfig
@@ -84,6 +84,13 @@ CONFIG_GPIO_RCAR=y
 CONFIG_THERMAL=y
 CONFIG_RCAR_THERMAL=y
 CONFIG_SSB=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MEDIA_CAMERA_SUPPORT=y
+CONFIG_V4L_PLATFORM_DRIVERS=y
+CONFIG_SOC_CAMERA=y
+CONFIG_VIDEO_RCAR_VIN=y
+# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set
+CONFIG_VIDEO_ADV7180=y
 CONFIG_USB=y
 CONFIG_USB_RCAR_PHY=y
 CONFIG_MMC=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 5339e6a4d639..056b27aafbe6 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -78,6 +78,7 @@ CONFIG_MAC80211_RC_PID=y
 CONFIG_MAC80211_RC_DEFAULT_PID=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_CONNECTOR=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -185,13 +186,11 @@ CONFIG_OMAP2_DSS_RFBI=y
 CONFIG_OMAP2_DSS_SDI=y
 CONFIG_OMAP2_DSS_DSI=y
 CONFIG_FB_OMAP2=m
-CONFIG_PANEL_GENERIC_DPI=m
-CONFIG_PANEL_TFP410=m
-CONFIG_PANEL_SHARP_LS037V7DW01=m
-CONFIG_PANEL_NEC_NL8048HL11_01B=m
-CONFIG_PANEL_TAAL=m
-CONFIG_PANEL_TPO_TD043MTEA1=m
-CONFIG_PANEL_ACX565AKM=m
+CONFIG_DISPLAY_ENCODER_TFP410=m
+CONFIG_DISPLAY_ENCODER_TPD12S015=m
+CONFIG_DISPLAY_CONNECTOR_DVI=m
+CONFIG_DISPLAY_CONNECTOR_HDMI=m
+CONFIG_DISPLAY_PANEL_DPI=m
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 1effb43dab80..92d0a149aeb5 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -79,6 +79,7 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_MTD=y
 CONFIG_MTD_M25P80=y
 CONFIG_PROC_DEVICETREE=y
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index e406d575c94f..5665134bfa3e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -17,7 +17,8 @@ int arch_timer_arch_init(void);
  * nicely work out which register we want, and chuck away the rest of
  * the code. At least it does so with a recent GCC (4.6.3).
  */
-static inline void arch_timer_reg_write(const int access, const int reg, u32 val)
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 {
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
@@ -28,9 +29,7 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
 			asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val));
 			break;
 		}
-	}
-
-	if (access == ARCH_TIMER_VIRT_ACCESS) {
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
 			asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val));
@@ -44,7 +43,8 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
 	isb();
 }
 
-static inline u32 arch_timer_reg_read(const int access, const int reg)
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 {
 	u32 val = 0;
 
@@ -57,9 +57,7 @@ static inline u32 arch_timer_reg_read(const int access, const int reg)
 			asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val));
 			break;
 		}
-	}
-
-	if (access == ARCH_TIMER_VIRT_ACCESS) {
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
 			asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val));
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4d93da..e072bb2ba1b1 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,7 +2,7 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
 #include <asm-generic/dma-contiguous.h>
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..9b28c41f4ba9 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void);
 
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
-	pte_val(*pte) = new_pte;
+	*pte = new_pte;
 	/*
 	 * flush_pmd_entry just takes a void pointer and cleans the necessary
 	 * cache entries, so we can reuse the function for ptes.
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 261fcc826169..88e14d74b6de 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -525,11 +525,6 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
 			 * Assign resources.
 			 */
 			pci_bus_assign_resources(bus);
-
-			/*
-			 * Enable bridges
-			 */
-			pci_enable_bridges(bus);
 		}
 
 		/*
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 5859c8bc727c..2ee8a17d2b01 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -169,6 +169,11 @@ void __init arm_dt_init_cpu_maps(void)
 	}
 }
 
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return (phys_id & MPIDR_HWID_BITMASK) == cpu_logical_map(cpu);
+}
+
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
  * @dt_phys: physical address of dt blob
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index c5a59546a256..85a87370f144 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -74,12 +74,8 @@ struct cpu_efficiency table_efficiency[] = {
 	{NULL, },
 };
 
-struct cpu_capacity {
-	unsigned long hwid;
-	unsigned long capacity;
-};
-
-struct cpu_capacity *cpu_capacity;
+unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu)	__cpu_capacity[cpu]
 
 unsigned long middle_capacity = 1;
 
@@ -100,15 +96,19 @@ static void __init parse_dt_topology(void)
 	unsigned long capacity = 0;
 	int alloc_size, cpu = 0;
 
-	alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
-	cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+	alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
+	__cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
 
-	while ((cn = of_find_node_by_type(cn, "cpu"))) {
-		const u32 *rate, *reg;
+	for_each_possible_cpu(cpu) {
+		const u32 *rate;
 		int len;
 
-		if (cpu >= num_possible_cpus())
-			break;
+		/* too early to use cpu->of_node */
+		cn = of_get_cpu_node(cpu, NULL);
+		if (!cn) {
+			pr_err("missing device node for CPU %d\n", cpu);
+			continue;
+		}
 
 		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
 			if (of_device_is_compatible(cn, cpu_eff->compatible))
@@ -124,12 +124,6 @@ static void __init parse_dt_topology(void)
 			continue;
 		}
 
-		reg = of_get_property(cn, "reg", &len);
-		if (!reg || len != 4) {
-			pr_err("%s missing reg property\n", cn->full_name);
-			continue;
-		}
-
 		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
 
 		/* Save min capacity of the system */
@@ -140,13 +134,9 @@ static void __init parse_dt_topology(void)
 		if (capacity > max_capacity)
 			max_capacity = capacity;
 
-		cpu_capacity[cpu].capacity = capacity;
-		cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
+		cpu_capacity(cpu) = capacity;
 	}
 
-	if (cpu < num_possible_cpus())
-		cpu_capacity[cpu].hwid = (unsigned long)(-1);
-
 	/* If min and max capacities are equals, we bypass the update of the
 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
 	 * compute a middle_capacity factor that will ensure that the capacity
@@ -154,9 +144,7 @@ static void __init parse_dt_topology(void)
 	 * SCHED_POWER_SCALE, which is the default value, but with the
 	 * constraint explained near table_efficiency[].
 	 */
-	if (min_capacity == max_capacity)
-		cpu_capacity[0].hwid = (unsigned long)(-1);
-	else if (4*max_capacity < (3*(max_capacity + min_capacity)))
+	if (4*max_capacity < (3*(max_capacity + min_capacity)))
 		middle_capacity = (min_capacity + max_capacity)
 				>> (SCHED_POWER_SHIFT+1);
 	else
@@ -170,23 +158,12 @@ static void __init parse_dt_topology(void)
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
  * function returns directly for SMP system.
  */
-void update_cpu_power(unsigned int cpu, unsigned long hwid)
+void update_cpu_power(unsigned int cpu)
 {
-	unsigned int idx = 0;
-
-	/* look for the cpu's hwid in the cpu capacity table */
-	for (idx = 0; idx < num_possible_cpus(); idx++) {
-		if (cpu_capacity[idx].hwid == hwid)
-			break;
-
-		if (cpu_capacity[idx].hwid == -1)
-			return;
-	}
-
-	if (idx == num_possible_cpus())
+	if (!cpu_capacity(cpu))
 		return;
 
-	set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
+	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
 	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
 		cpu, arch_scale_freq_power(NULL, cpu));
@@ -194,7 +171,7 @@ void update_cpu_power(unsigned int cpu, unsigned long hwid)
 
 #else
 static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
+static inline void update_cpu_power(unsigned int cpuid) {}
 #endif
 
  /*
@@ -281,7 +258,7 @@ void store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
-	update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
+	update_cpu_power(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 741f66a2edbd..9c697db2787e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 16cd4ba5d7fd..85dd84b10687 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -492,10 +492,10 @@ __kvm_hyp_code_end:
 	.section ".rodata"
 
 und_die_str:
-	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x\n"
 pabt_die_str:
-	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x\n"
 dabt_die_str:
-	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+	.ascii	"unexpected data abort in Hyp mode at: %#08x\n"
 svc_die_str:
-	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b7840e7aa452..71e08baee209 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = {
 };
 
 static const struct kvm_irq_level a15_vtimer_irq = {
-	.irq = 27,
+	{ .irq = 27 },
 	.level = 1,
 };
 
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
 		__entry->ipa			= ipa;
 	),
 
-	TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
-		  "ipa %#16llx, hsr %#08lx",
-		  __entry->vcpu_pc, __entry->hxfar,
-		  __entry->ipa, __entry->hsr)
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
 );
 
 TRACE_EVENT(kvm_irq_line,
diff --git a/arch/arm/mach-at91/include/mach/at91_adc.h b/arch/arm/mach-at91/include/mach/at91_adc.h
index 8e7ed5c90817..048a57f76bd3 100644
--- a/arch/arm/mach-at91/include/mach/at91_adc.h
+++ b/arch/arm/mach-at91/include/mach/at91_adc.h
@@ -28,9 +28,12 @@
 #define			AT91_ADC_TRGSEL_EXTERNAL	(6 << 1)
 #define		AT91_ADC_LOWRES		(1 << 4)	/* Low Resolution */
 #define		AT91_ADC_SLEEP		(1 << 5)	/* Sleep Mode */
-#define		AT91_ADC_PRESCAL	(0x3f << 8)	/* Prescalar Rate Selection */
+#define		AT91_ADC_PRESCAL_9260	(0x3f << 8)	/* Prescalar Rate Selection */
+#define		AT91_ADC_PRESCAL_9G45	(0xff << 8)
 #define			AT91_ADC_PRESCAL_(x)	((x) << 8)
-#define		AT91_ADC_STARTUP	(0x1f << 16)	/* Startup Up Time */
+#define		AT91_ADC_STARTUP_9260	(0x1f << 16)	/* Startup Up Time */
+#define		AT91_ADC_STARTUP_9G45	(0x7f << 16)
+#define		AT91_ADC_STARTUP_9X5	(0xf << 16)
 #define			AT91_ADC_STARTUP_(x)	((x) << 16)
 #define		AT91_ADC_SHTIM		(0xf  << 24)	/* Sample & Hold Time */
 #define			AT91_ADC_SHTIM_(x)	((x) << 24)
@@ -48,6 +51,9 @@
 #define		AT91_ADC_ENDRX		(1 << 18)	/* End of RX Buffer */
 #define		AT91_ADC_RXFUFF		(1 << 19)	/* RX Buffer Full */
 
+#define AT91_ADC_SR_9X5		0x30		/* Status Register for 9x5 */
+#define		AT91_ADC_SR_DRDY_9X5	(1 << 24)	/* Data Ready */
+
 #define AT91_ADC_LCDR		0x20		/* Last Converted Data Register */
 #define		AT91_ADC_LDATA		(0x3ff)
 
@@ -58,4 +64,10 @@
 #define AT91_ADC_CHR(n)		(0x30 + ((n) * 4))	/* Channel Data Register N */
 #define		AT91_ADC_DATA		(0x3ff)
 
+#define AT91_ADC_CDR0_9X5	(0x50)			/* Channel Data Register 0 for 9X5 */
+
+#define AT91_ADC_TRGR_9260	AT91_ADC_MR
+#define AT91_ADC_TRGR_9G45	0x08
+#define AT91_ADC_TRGR_9X5	0xC0
+
 #endif
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index bea6793a7ede..9f09f45835f8 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -1249,12 +1249,10 @@ static struct vpif_capture_config da850_vpif_capture_config = {
 
 static struct adv7343_platform_data adv7343_pdata = {
 	.mode_config = {
-		.dac_3 = 1,
-		.dac_2 = 1,
-		.dac_1 = 1,
+		.dac = { 1, 1, 1 },
 	},
 	.sd_config = {
-		.sd_dac_out1 = 1,
+		.sd_dac_out = { 1 },
 	},
 };
 
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index 36aef3a7dedb..f1ac1c94ac0f 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -65,7 +65,7 @@ static struct cpuidle_driver davinci_idle_driver = {
 	.states[1]		= {
 		.enter			= davinci_enter_idle,
 		.exit_latency		= 10,
-		.target_residency	= 100000,
+		.target_residency	= 10000,
 		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "DDR SR",
 		.desc			= "WFI and DDR Self Refresh",
diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 00247c771313..304f069ebf50 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -108,8 +108,8 @@ static void __init dove_clk_init(void)
 	orion_clkdev_add(NULL, "sdhci-dove.1", sdio1);
 	orion_clkdev_add(NULL, "orion_nand", nand);
 	orion_clkdev_add(NULL, "cafe1000-ccic.0", camera);
-	orion_clkdev_add(NULL, "kirkwood-i2s.0", i2s0);
-	orion_clkdev_add(NULL, "kirkwood-i2s.1", i2s1);
+	orion_clkdev_add(NULL, "mvebu-audio.0", i2s0);
+	orion_clkdev_add(NULL, "mvebu-audio.1", i2s1);
 	orion_clkdev_add(NULL, "mv_crypto", crypto);
 	orion_clkdev_add(NULL, "dove-ac97", ac97);
 	orion_clkdev_add(NULL, "dove-pdma", pdma);
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 7be13f8e69a0..a02f275a198d 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -254,13 +254,12 @@ static void __init imx6q_opp_init(struct device *cpu_dev)
 {
 	struct device_node *np;
 
-	np = of_find_node_by_path("/cpus/cpu@0");
+	np = of_node_get(cpu_dev->of_node);
 	if (!np) {
 		pr_warn("failed to find cpu0 node\n");
 		return;
 	}
 
-	cpu_dev->of_node = np;
 	if (of_init_opp_table(cpu_dev)) {
 		pr_warn("failed to init OPP table\n");
 		goto put_node;
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index e9238b5567ee..1663de090984 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -264,7 +264,7 @@ void __init kirkwood_clk_init(void)
 	orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1);
 	orion_clkdev_add("0", "pcie", pex0);
 	orion_clkdev_add("1", "pcie", pex1);
-	orion_clkdev_add(NULL, "kirkwood-i2s", audio);
+	orion_clkdev_add(NULL, "mvebu-audio", audio);
 	orion_clkdev_add(NULL, MV64XXX_I2C_CTLR_NAME ".0", runit);
 	orion_clkdev_add(NULL, MV64XXX_I2C_CTLR_NAME ".1", runit);
 
@@ -560,7 +560,7 @@ void __init kirkwood_timer_init(void)
 /*****************************************************************************
  * Audio
  ****************************************************************************/
-static struct resource kirkwood_i2s_resources[] = {
+static struct resource kirkwood_audio_resources[] = {
 	[0] = {
 		.start  = AUDIO_PHYS_BASE,
 		.end    = AUDIO_PHYS_BASE + SZ_16K - 1,
@@ -573,29 +573,23 @@ static struct resource kirkwood_i2s_resources[] = {
 	},
 };
 
-static struct kirkwood_asoc_platform_data kirkwood_i2s_data = {
+static struct kirkwood_asoc_platform_data kirkwood_audio_data = {
 	.burst       = 128,
 };
 
-static struct platform_device kirkwood_i2s_device = {
-	.name		= "kirkwood-i2s",
+static struct platform_device kirkwood_audio_device = {
+	.name		= "mvebu-audio",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(kirkwood_i2s_resources),
-	.resource	= kirkwood_i2s_resources,
+	.num_resources	= ARRAY_SIZE(kirkwood_audio_resources),
+	.resource	= kirkwood_audio_resources,
 	.dev		= {
-		.platform_data	= &kirkwood_i2s_data,
+		.platform_data	= &kirkwood_audio_data,
 	},
 };
 
-static struct platform_device kirkwood_pcm_device = {
-	.name		= "kirkwood-pcm-audio",
-	.id		= -1,
-};
-
 void __init kirkwood_audio_init(void)
 {
-	platform_device_register(&kirkwood_i2s_device);
-	platform_device_register(&kirkwood_pcm_device);
+	platform_device_register(&kirkwood_audio_device);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-msm/devices-msm7x00.c b/arch/arm/mach-msm/devices-msm7x00.c
index 6d50fb964863..d83404d4b328 100644
--- a/arch/arm/mach-msm/devices-msm7x00.c
+++ b/arch/arm/mach-msm/devices-msm7x00.c
@@ -456,9 +456,9 @@ static struct clk_pcom_desc msm_clocks_7x01a[] = {
 	CLK_PCOM("tsif_ref_clk",	TSIF_REF_CLK,	NULL, 0),
 	CLK_PCOM("tv_dac_clk",	TV_DAC_CLK,	NULL, 0),
 	CLK_PCOM("tv_enc_clk",	TV_ENC_CLK,	NULL, 0),
-	CLK_PCOM("uart_clk",	UART1_CLK,	"msm_serial.0", OFF),
-	CLK_PCOM("uart_clk",	UART2_CLK,	"msm_serial.1", 0),
-	CLK_PCOM("uart_clk",	UART3_CLK,	"msm_serial.2", OFF),
+	CLK_PCOM("core",	UART1_CLK,	"msm_serial.0", OFF),
+	CLK_PCOM("core",	UART2_CLK,	"msm_serial.1", 0),
+	CLK_PCOM("core",	UART3_CLK,	"msm_serial.2", OFF),
 	CLK_PCOM("uart1dm_clk",	UART1DM_CLK,	NULL, OFF),
 	CLK_PCOM("uart2dm_clk",	UART2DM_CLK,	NULL, 0),
 	CLK_PCOM("usb_hs_clk",	USB_HS_CLK,	"msm_hsusb", OFF),
diff --git a/arch/arm/mach-msm/devices-msm7x30.c b/arch/arm/mach-msm/devices-msm7x30.c
index d4db75acff56..14e286948f69 100644
--- a/arch/arm/mach-msm/devices-msm7x30.c
+++ b/arch/arm/mach-msm/devices-msm7x30.c
@@ -211,7 +211,7 @@ static struct clk_pcom_desc msm_clocks_7x30[] = {
 	CLK_PCOM("spi_pclk",	SPI_P_CLK,	NULL, 0),
 	CLK_PCOM("tv_dac_clk",	TV_DAC_CLK,	NULL, 0),
 	CLK_PCOM("tv_enc_clk",	TV_ENC_CLK,	NULL, 0),
-	CLK_PCOM("uart_clk",	UART2_CLK,	"msm_serial.1", 0),
+	CLK_PCOM("core",	UART2_CLK,	"msm_serial.1", 0),
 	CLK_PCOM("usb_phy_clk",	USB_PHY_CLK,	NULL, 0),
 	CLK_PCOM("usb_hs_clk",		USB_HS_CLK,		NULL, OFF),
 	CLK_PCOM("usb_hs_pclk",		USB_HS_P_CLK,		NULL, OFF),
diff --git a/arch/arm/mach-msm/devices-qsd8x50.c b/arch/arm/mach-msm/devices-qsd8x50.c
index f5518112284b..2ed89b25d304 100644
--- a/arch/arm/mach-msm/devices-qsd8x50.c
+++ b/arch/arm/mach-msm/devices-qsd8x50.c
@@ -358,9 +358,9 @@ static struct clk_pcom_desc msm_clocks_8x50[] = {
 	CLK_PCOM("tsif_ref_clk",	TSIF_REF_CLK,	NULL, 0),
 	CLK_PCOM("tv_dac_clk",	TV_DAC_CLK,	NULL, 0),
 	CLK_PCOM("tv_enc_clk",	TV_ENC_CLK,	NULL, 0),
-	CLK_PCOM("uart_clk",	UART1_CLK,	NULL, OFF),
-	CLK_PCOM("uart_clk",	UART2_CLK,	NULL, 0),
-	CLK_PCOM("uart_clk",	UART3_CLK,	"msm_serial.2", OFF),
+	CLK_PCOM("core",	UART1_CLK,	NULL, OFF),
+	CLK_PCOM("core",	UART2_CLK,	NULL, 0),
+	CLK_PCOM("core",	UART3_CLK,	"msm_serial.2", OFF),
 	CLK_PCOM("uartdm_clk",	UART1DM_CLK,	NULL, OFF),
 	CLK_PCOM("uartdm_clk",	UART2DM_CLK,	NULL, 0),
 	CLK_PCOM("usb_hs_clk",	USB_HS_CLK,	NULL, OFF),
diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c
index ce81d3031405..594b63db4215 100644
--- a/arch/arm/mach-mvebu/platsmp.c
+++ b/arch/arm/mach-mvebu/platsmp.c
@@ -29,45 +29,40 @@
 #include "pmsu.h"
 #include "coherency.h"
 
+static struct clk *__init get_cpu_clk(int cpu)
+{
+	struct clk *cpu_clk;
+	struct device_node *np = of_get_cpu_node(cpu, NULL);
+
+	if (WARN(!np, "missing cpu node\n"))
+		return NULL;
+	cpu_clk = of_clk_get(np, 0);
+	if (WARN_ON(IS_ERR(cpu_clk)))
+		return NULL;
+	return cpu_clk;
+}
+
 void __init set_secondary_cpus_clock(void)
 {
-	int thiscpu;
+	int thiscpu, cpu;
 	unsigned long rate;
-	struct clk *cpu_clk = NULL;
-	struct device_node *np = NULL;
+	struct clk *cpu_clk;
 
 	thiscpu = smp_processor_id();
-	for_each_node_by_type(np, "cpu") {
-		int err;
-		int cpu;
-
-		err = of_property_read_u32(np, "reg", &cpu);
-		if (WARN_ON(err))
-			return;
-
-		if (cpu == thiscpu) {
-			cpu_clk = of_clk_get(np, 0);
-			break;
-		}
-	}
-	if (WARN_ON(IS_ERR(cpu_clk)))
+	cpu_clk = get_cpu_clk(thiscpu);
+	if (!cpu_clk)
 		return;
 	clk_prepare_enable(cpu_clk);
 	rate = clk_get_rate(cpu_clk);
 
 	/* set all the other CPU clk to the same rate than the boot CPU */
-	for_each_node_by_type(np, "cpu") {
-		int err;
-		int cpu;
-
-		err = of_property_read_u32(np, "reg", &cpu);
-		if (WARN_ON(err))
+	for_each_possible_cpu(cpu) {
+		if (cpu == thiscpu)
+			continue;
+		cpu_clk = get_cpu_clk(cpu);
+		if (!cpu_clk)
 			return;
-
-		if (cpu != thiscpu) {
-			cpu_clk = of_clk_get(np, 0);
-			clk_set_rate(cpu_clk, rate);
-		}
+		clk_set_rate(cpu_clk, rate);
 	}
 }
 
diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index 244d8a5aa54b..c711ad6ac067 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -100,39 +100,52 @@ static struct platform_device sdp2430_flash_device = {
 	.resource	= &sdp2430_flash_resource,
 };
 
-static struct platform_device *sdp2430_devices[] __initdata = {
-	&sdp2430_flash_device,
-};
-
 /* LCD */
 #define SDP2430_LCD_PANEL_BACKLIGHT_GPIO	91
 #define SDP2430_LCD_PANEL_ENABLE_GPIO		154
 
-static struct panel_generic_dpi_data sdp2430_panel_data = {
-	.name			= "nec_nl2432dr22-11b",
-	.num_gpios		= 2,
-	.gpios			= {
-		SDP2430_LCD_PANEL_ENABLE_GPIO,
-		SDP2430_LCD_PANEL_BACKLIGHT_GPIO,
-	},
+static const struct display_timing sdp2430_lcd_videomode = {
+	.pixelclock	= { 0, 5400000, 0 },
+
+	.hactive = { 0, 240, 0 },
+	.hfront_porch = { 0, 3, 0 },
+	.hback_porch = { 0, 39, 0 },
+	.hsync_len = { 0, 3, 0 },
+
+	.vactive = { 0, 320, 0 },
+	.vfront_porch = { 0, 2, 0 },
+	.vback_porch = { 0, 7, 0 },
+	.vsync_len = { 0, 1, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
 };
 
-static struct omap_dss_device sdp2430_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "generic_dpi_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 16,
-	.data			= &sdp2430_panel_data,
+static struct panel_dpi_platform_data sdp2430_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 16,
+
+	.display_timing		= &sdp2430_lcd_videomode,
+
+	.enable_gpio		= SDP2430_LCD_PANEL_ENABLE_GPIO,
+	.backlight_gpio		= SDP2430_LCD_PANEL_BACKLIGHT_GPIO,
 };
 
-static struct omap_dss_device *sdp2430_dss_devices[] = {
-	&sdp2430_lcd_device,
+static struct platform_device sdp2430_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &sdp2430_lcd_pdata,
 };
 
 static struct omap_dss_board_info sdp2430_dss_data = {
-	.num_devices	= ARRAY_SIZE(sdp2430_dss_devices),
-	.devices	= sdp2430_dss_devices,
-	.default_device	= &sdp2430_lcd_device,
+	.default_display_name = "lcd",
+};
+
+static struct platform_device *sdp2430_devices[] __initdata = {
+	&sdp2430_flash_device,
+	&sdp2430_lcd_device,
 };
 
 #if IS_ENABLED(CONFIG_SMC91X)
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 23b004afa3f8..d95d0ef1354a 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -126,53 +126,65 @@ static void __init sdp3430_display_init(void)
 
 }
 
-static struct panel_sharp_ls037v7dw01_data sdp3430_lcd_data = {
-	.resb_gpio = SDP3430_LCD_PANEL_ENABLE_GPIO,
-	.ini_gpio = -1,
-	.mo_gpio = -1,
-	.lr_gpio = -1,
-	.ud_gpio = -1,
+static struct panel_sharp_ls037v7dw01_platform_data sdp3430_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 16,
+
+	.resb_gpio		= SDP3430_LCD_PANEL_ENABLE_GPIO,
+	.ini_gpio		= -1,
+	.mo_gpio		= -1,
+	.lr_gpio		= -1,
+	.ud_gpio		= -1,
+};
+
+static struct platform_device sdp3430_lcd_device = {
+	.name                   = "panel-sharp-ls037v7dw01",
+	.id                     = 0,
+	.dev.platform_data      = &sdp3430_lcd_pdata,
 };
 
-static struct omap_dss_device sdp3430_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "sharp_ls_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 16,
-	.data			= &sdp3430_lcd_data,
+static struct connector_dvi_platform_data sdp3430_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = -1,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= -1,
-	.i2c_bus_num		= -1,
+static struct platform_device sdp3430_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &sdp3430_dvi_connector_pdata,
 };
 
-static struct omap_dss_device sdp3430_dvi_device = {
-	.name			= "dvi",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct encoder_tfp410_platform_data sdp3430_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = -1,
 };
 
-static struct omap_dss_device sdp3430_tv_device = {
-	.name			= "tv",
-	.driver_name		= "venc",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct platform_device sdp3430_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &sdp3430_tfp410_pdata,
 };
 
+static struct connector_atv_platform_data sdp3430_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
+};
 
-static struct omap_dss_device *sdp3430_dss_devices[] = {
-	&sdp3430_lcd_device,
-	&sdp3430_dvi_device,
-	&sdp3430_tv_device,
+static struct platform_device sdp3430_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &sdp3430_tv_pdata,
 };
 
 static struct omap_dss_board_info sdp3430_dss_data = {
-	.num_devices	= ARRAY_SIZE(sdp3430_dss_devices),
-	.devices	= sdp3430_dss_devices,
-	.default_device	= &sdp3430_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static struct omap2_hsmmc_info mmc[] = {
@@ -583,6 +595,11 @@ static void __init omap_3430sdp_init(void)
 	omap_hsmmc_init(mmc);
 	omap3430_i2c_init();
 	omap_display_init(&sdp3430_dss_data);
+	platform_device_register(&sdp3430_lcd_device);
+	platform_device_register(&sdp3430_tfp410_device);
+	platform_device_register(&sdp3430_dvi_connector_device);
+	platform_device_register(&sdp3430_tv_connector_device);
+
 	if (omap_rev() > OMAP3430_REV_ES1_0)
 		gpio_pendown = SDP3430_TS_GPIO_IRQ_SDPV2;
 	else
diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index d63f14b534b5..8cc2c9e9fb03 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c
@@ -120,56 +120,95 @@ static int __init am3517_evm_i2c_init(void)
 	return 0;
 }
 
-static struct panel_generic_dpi_data lcd_panel = {
-	.name			= "sharp_lq",
-	.num_gpios		= 3,
-	.gpios			= {
-		LCD_PANEL_PWR,
-		LCD_PANEL_BKLIGHT_PWR,
-		LCD_PANEL_PWM,
-	},
+static const struct display_timing am3517_evm_lcd_videomode = {
+	.pixelclock	= { 0, 9000000, 0 },
+
+	.hactive = { 0, 480, 0 },
+	.hfront_porch = { 0, 3, 0 },
+	.hback_porch = { 0, 2, 0 },
+	.hsync_len = { 0, 42, 0 },
+
+	.vactive = { 0, 272, 0 },
+	.vfront_porch = { 0, 3, 0 },
+	.vback_porch = { 0, 2, 0 },
+	.vsync_len = { 0, 11, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_LOW | DISPLAY_FLAGS_PIXDATA_POSEDGE,
+};
+
+static struct panel_dpi_platform_data am3517_evm_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 16,
+
+	.display_timing		= &am3517_evm_lcd_videomode,
+
+	.enable_gpio		= LCD_PANEL_PWR,
+	.backlight_gpio		= LCD_PANEL_BKLIGHT_PWR,
+};
+
+static struct platform_device am3517_evm_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &am3517_evm_lcd_pdata,
 };
 
-static struct omap_dss_device am3517_evm_lcd_device = {
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.name			= "lcd",
-	.driver_name		= "generic_dpi_panel",
-	.data			= &lcd_panel,
-	.phy.dpi.data_lines 	= 16,
+static struct connector_dvi_platform_data am3517_evm_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = -1,
 };
 
-static struct omap_dss_device am3517_evm_tv_device = {
-	.type 			= OMAP_DISPLAY_TYPE_VENC,
-	.name 			= "tv",
-	.driver_name		= "venc",
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct platform_device am3517_evm_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &am3517_evm_dvi_connector_pdata,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= -1,
-	.i2c_bus_num		= -1,
+static struct encoder_tfp410_platform_data am3517_evm_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = -1,
 };
 
-static struct omap_dss_device am3517_evm_dvi_device = {
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.name			= "dvi",
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct platform_device am3517_evm_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &am3517_evm_tfp410_pdata,
 };
 
-static struct omap_dss_device *am3517_evm_dss_devices[] = {
-	&am3517_evm_lcd_device,
-	&am3517_evm_tv_device,
-	&am3517_evm_dvi_device,
+static struct connector_atv_platform_data am3517_evm_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
+};
+
+static struct platform_device am3517_evm_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &am3517_evm_tv_pdata,
 };
 
 static struct omap_dss_board_info am3517_evm_dss_data = {
-	.num_devices	= ARRAY_SIZE(am3517_evm_dss_devices),
-	.devices	= am3517_evm_dss_devices,
-	.default_device	= &am3517_evm_lcd_device,
+	.default_display_name = "lcd",
 };
 
+static void __init am3517_evm_display_init(void)
+{
+	gpio_request_one(LCD_PANEL_PWM, GPIOF_OUT_INIT_HIGH, "lcd panel pwm");
+
+	omap_display_init(&am3517_evm_dss_data);
+
+	platform_device_register(&am3517_evm_tfp410_device);
+	platform_device_register(&am3517_evm_dvi_connector_device);
+	platform_device_register(&am3517_evm_lcd_device);
+	platform_device_register(&am3517_evm_tv_connector_device);
+}
+
 /*
  * Board initialization
  */
@@ -295,7 +334,9 @@ static void __init am3517_evm_init(void)
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
 
 	am3517_evm_i2c_init();
-	omap_display_init(&am3517_evm_dss_data);
+
+	am3517_evm_display_init();
+
 	omap_serial_init();
 	omap_sdrc_init(NULL, NULL);
 
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index d4622ed26252..33d159e2386e 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -190,52 +190,81 @@ static inline void cm_t35_init_nand(void) {}
 #define CM_T35_LCD_BL_GPIO 58
 #define CM_T35_DVI_EN_GPIO 54
 
-static struct panel_generic_dpi_data lcd_panel = {
-	.name			= "toppoly_tdo35s",
-	.num_gpios		= 1,
-	.gpios			= {
-		CM_T35_LCD_BL_GPIO,
-	},
+static const struct display_timing cm_t35_lcd_videomode = {
+	.pixelclock	= { 0, 26000000, 0 },
+
+	.hactive = { 0, 480, 0 },
+	.hfront_porch = { 0, 104, 0 },
+	.hback_porch = { 0, 8, 0 },
+	.hsync_len = { 0, 8, 0 },
+
+	.vactive = { 0, 640, 0 },
+	.vfront_porch = { 0, 4, 0 },
+	.vback_porch = { 0, 2, 0 },
+	.vsync_len = { 0, 2, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_NEGEDGE,
+};
+
+static struct panel_dpi_platform_data cm_t35_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 18,
+
+	.display_timing		= &cm_t35_lcd_videomode,
+
+	.enable_gpio		= -1,
+	.backlight_gpio		= CM_T35_LCD_BL_GPIO,
+};
+
+static struct platform_device cm_t35_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &cm_t35_lcd_pdata,
 };
 
-static struct omap_dss_device cm_t35_lcd_device = {
-	.name			= "lcd",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
-	.data			= &lcd_panel,
-	.phy.dpi.data_lines	= 18,
+static struct connector_dvi_platform_data cm_t35_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = -1,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= CM_T35_DVI_EN_GPIO,
-	.i2c_bus_num		= -1,
+static struct platform_device cm_t35_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &cm_t35_dvi_connector_pdata,
 };
 
-static struct omap_dss_device cm_t35_dvi_device = {
-	.name			= "dvi",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct encoder_tfp410_platform_data cm_t35_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = CM_T35_DVI_EN_GPIO,
 };
 
-static struct omap_dss_device cm_t35_tv_device = {
-	.name			= "tv",
-	.driver_name		= "venc",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct platform_device cm_t35_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &cm_t35_tfp410_pdata,
 };
 
-static struct omap_dss_device *cm_t35_dss_devices[] = {
-	&cm_t35_lcd_device,
-	&cm_t35_dvi_device,
-	&cm_t35_tv_device,
+static struct connector_atv_platform_data cm_t35_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
+};
+
+static struct platform_device cm_t35_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &cm_t35_tv_pdata,
 };
 
 static struct omap_dss_board_info cm_t35_dss_data = {
-	.num_devices	= ARRAY_SIZE(cm_t35_dss_devices),
-	.devices	= cm_t35_dss_devices,
-	.default_device	= &cm_t35_dvi_device,
+	.default_display_name = "dvi",
 };
 
 static struct omap2_mcspi_device_config tdo24m_mcspi_config = {
@@ -280,6 +309,11 @@ static void __init cm_t35_init_display(void)
 		pr_err("CM-T35: failed to register DSS device\n");
 		gpio_free(CM_T35_LCD_EN_GPIO);
 	}
+
+	platform_device_register(&cm_t35_tfp410_device);
+	platform_device_register(&cm_t35_dvi_connector_device);
+	platform_device_register(&cm_t35_lcd_device);
+	platform_device_register(&cm_t35_tv_connector_device);
 }
 
 static struct regulator_consumer_supply cm_t35_vmmc1_supply[] = {
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index f1d91ba5d1ac..cdc4fb9960a9 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -112,50 +112,81 @@ static struct regulator_consumer_supply devkit8000_vio_supply[] = {
 	REGULATOR_SUPPLY("vcc", "spi2.0"),
 };
 
-static struct panel_generic_dpi_data lcd_panel = {
-	.name			= "innolux_at070tn83",
-	/* gpios filled in code */
+static const struct display_timing devkit8000_lcd_videomode = {
+	.pixelclock	= { 0, 40000000, 0 },
+
+	.hactive = { 0, 800, 0 },
+	.hfront_porch = { 0, 1, 0 },
+	.hback_porch = { 0, 1, 0 },
+	.hsync_len = { 0, 48, 0 },
+
+	.vactive = { 0, 480, 0 },
+	.vfront_porch = { 0, 12, 0 },
+	.vback_porch = { 0, 25, 0 },
+	.vsync_len = { 0, 3, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
 };
 
-static struct omap_dss_device devkit8000_lcd_device = {
+static struct panel_dpi_platform_data devkit8000_lcd_pdata = {
 	.name                   = "lcd",
-	.type                   = OMAP_DISPLAY_TYPE_DPI,
-	.driver_name            = "generic_dpi_panel",
-	.data			= &lcd_panel,
-	.phy.dpi.data_lines     = 24,
+	.source                 = "dpi.0",
+
+	.data_lines		= 24,
+
+	.display_timing		= &devkit8000_lcd_videomode,
+
+	.enable_gpio		= -1,	/* filled in code */
+	.backlight_gpio		= -1,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= -1,
-	.i2c_bus_num		= 1,
+static struct platform_device devkit8000_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &devkit8000_lcd_pdata,
 };
 
-static struct omap_dss_device devkit8000_dvi_device = {
+static struct connector_dvi_platform_data devkit8000_dvi_connector_pdata = {
 	.name                   = "dvi",
-	.type                   = OMAP_DISPLAY_TYPE_DPI,
-	.driver_name            = "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines     = 24,
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = 1,
 };
 
-static struct omap_dss_device devkit8000_tv_device = {
-	.name                   = "tv",
-	.driver_name            = "venc",
-	.type                   = OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type          = OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct platform_device devkit8000_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &devkit8000_dvi_connector_pdata,
 };
 
+static struct encoder_tfp410_platform_data devkit8000_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = -1,	/* filled in code */
+};
 
-static struct omap_dss_device *devkit8000_dss_devices[] = {
-	&devkit8000_lcd_device,
-	&devkit8000_dvi_device,
-	&devkit8000_tv_device,
+static struct platform_device devkit8000_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &devkit8000_tfp410_pdata,
+};
+
+static struct connector_atv_platform_data devkit8000_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
+};
+
+static struct platform_device devkit8000_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &devkit8000_tv_pdata,
 };
 
 static struct omap_dss_board_info devkit8000_dss_data = {
-	.num_devices = ARRAY_SIZE(devkit8000_dss_devices),
-	.devices = devkit8000_dss_devices,
-	.default_device = &devkit8000_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static uint32_t board_keymap[] = {
@@ -204,11 +235,10 @@ static int devkit8000_twl_gpio_setup(struct device *dev,
 	gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1;
 
 	/* TWL4030_GPIO_MAX + 0 is "LCD_PWREN" (out, active high) */
-	lcd_panel.num_gpios = 1;
-	lcd_panel.gpios[0] = gpio + TWL4030_GPIO_MAX + 0;
+	devkit8000_lcd_pdata.enable_gpio = gpio + TWL4030_GPIO_MAX + 0;
 
 	/* gpio + 7 is "DVI_PD" (out, active low) */
-	dvi_panel.power_down_gpio = gpio + 7;
+	devkit8000_tfp410_pdata.power_down_gpio = gpio + 7;
 
 	return 0;
 }
@@ -413,6 +443,10 @@ static struct platform_device *devkit8000_devices[] __initdata = {
 	&leds_gpio,
 	&keys_gpio,
 	&omap_dm9000_dev,
+	&devkit8000_lcd_device,
+	&devkit8000_tfp410_device,
+	&devkit8000_dvi_connector_device,
+	&devkit8000_tv_connector_device,
 };
 
 static struct usbhs_omap_platform_data usbhs_bdata __initdata = {
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 69c0acf5aa63..87e41a8b8d46 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -194,30 +194,48 @@ static struct platform_device h4_flash_device = {
 	.resource	= &h4_flash_resource,
 };
 
-static struct platform_device *h4_devices[] __initdata = {
-	&h4_flash_device,
+static const struct display_timing cm_t35_lcd_videomode = {
+	.pixelclock	= { 0, 6250000, 0 },
+
+	.hactive = { 0, 240, 0 },
+	.hfront_porch = { 0, 15, 0 },
+	.hback_porch = { 0, 60, 0 },
+	.hsync_len = { 0, 15, 0 },
+
+	.vactive = { 0, 320, 0 },
+	.vfront_porch = { 0, 1, 0 },
+	.vback_porch = { 0, 1, 0 },
+	.vsync_len = { 0, 1, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_HIGH | DISPLAY_FLAGS_VSYNC_HIGH |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
 };
 
-static struct panel_generic_dpi_data h4_panel_data = {
-	.name			= "h4",
+static struct panel_dpi_platform_data cm_t35_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 16,
+
+	.display_timing		= &cm_t35_lcd_videomode,
+
+	.enable_gpio		= -1,
+	.backlight_gpio		= -1,
 };
 
-static struct omap_dss_device h4_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "generic_dpi_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 16,
-	.data			= &h4_panel_data,
+static struct platform_device cm_t35_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &cm_t35_lcd_pdata,
 };
 
-static struct omap_dss_device *h4_dss_devices[] = {
-	&h4_lcd_device,
+static struct platform_device *h4_devices[] __initdata = {
+	&h4_flash_device,
+	&cm_t35_lcd_device,
 };
 
 static struct omap_dss_board_info h4_dss_data = {
-	.num_devices	= ARRAY_SIZE(h4_dss_devices),
-	.devices	= h4_dss_devices,
-	.default_device	= &h4_lcd_device,
+	.default_display_name = "lcd",
 };
 
 /* 2420 Sysboot setup (2430 is different) */
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 87e65dde8e13..06dbb2d3d38b 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -429,31 +429,39 @@ static struct twl4030_gpio_platform_data igep_twl4030_gpio_pdata = {
 	.setup		= igep_twl_gpio_setup,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.i2c_bus_num		= 3,
-	.power_down_gpio	= IGEP2_GPIO_DVI_PUP,
+static struct connector_dvi_platform_data omap3stalker_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = 3,
 };
 
-static struct omap_dss_device igep2_dvi_device = {
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.name			= "dvi",
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct platform_device omap3stalker_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &omap3stalker_dvi_connector_pdata,
 };
 
-static struct omap_dss_device *igep2_dss_devices[] = {
-	&igep2_dvi_device
+static struct encoder_tfp410_platform_data omap3stalker_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = IGEP2_GPIO_DVI_PUP,
+};
+
+static struct platform_device omap3stalker_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &omap3stalker_tfp410_pdata,
 };
 
 static struct omap_dss_board_info igep2_dss_data = {
-	.num_devices	= ARRAY_SIZE(igep2_dss_devices),
-	.devices	= igep2_dss_devices,
-	.default_device	= &igep2_dvi_device,
+	.default_display_name = "dvi",
 };
 
 static struct platform_device *igep_devices[] __initdata = {
 	&igep_vwlan_device,
+	&omap3stalker_tfp410_device,
+	&omap3stalker_dvi_connector_device,
 };
 
 static int igep2_keymap[] = {
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 62e4f701b63b..dd8da2c5399f 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -184,45 +184,70 @@ static inline void __init ldp_init_smsc911x(void)
 #define LCD_PANEL_RESET_GPIO		55
 #define LCD_PANEL_QVGA_GPIO		56
 
-static struct panel_generic_dpi_data ldp_panel_data = {
-	.name			= "nec_nl2432dr22-11b",
-	.num_gpios		= 4,
-	/* gpios filled in code */
+static const struct display_timing ldp_lcd_videomode = {
+	.pixelclock	= { 0, 5400000, 0 },
+
+	.hactive = { 0, 240, 0 },
+	.hfront_porch = { 0, 3, 0 },
+	.hback_porch = { 0, 39, 0 },
+	.hsync_len = { 0, 3, 0 },
+
+	.vactive = { 0, 320, 0 },
+	.vfront_porch = { 0, 2, 0 },
+	.vback_porch = { 0, 7, 0 },
+	.vsync_len = { 0, 1, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
 };
 
-static struct omap_dss_device ldp_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "generic_dpi_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 18,
-	.data			= &ldp_panel_data,
+static struct panel_dpi_platform_data ldp_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 18,
+
+	.display_timing		= &ldp_lcd_videomode,
+
+	.enable_gpio		= -1,	/* filled in code */
+	.backlight_gpio		= -1,	/* filled in code */
 };
 
-static struct omap_dss_device *ldp_dss_devices[] = {
-	&ldp_lcd_device,
+static struct platform_device ldp_lcd_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &ldp_lcd_pdata,
 };
 
 static struct omap_dss_board_info ldp_dss_data = {
-	.num_devices	= ARRAY_SIZE(ldp_dss_devices),
-	.devices	= ldp_dss_devices,
-	.default_device	= &ldp_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static void __init ldp_display_init(void)
 {
-	ldp_panel_data.gpios[2] = LCD_PANEL_RESET_GPIO;
-	ldp_panel_data.gpios[3] = LCD_PANEL_QVGA_GPIO;
+	int r;
+
+	static struct gpio gpios[] __initdata = {
+		{LCD_PANEL_RESET_GPIO, GPIOF_OUT_INIT_HIGH, "LCD RESET"},
+		{LCD_PANEL_QVGA_GPIO, GPIOF_OUT_INIT_HIGH, "LCD QVGA"},
+	};
+
+	r = gpio_request_array(gpios, ARRAY_SIZE(gpios));
+	if (r) {
+		pr_err("Cannot request LCD GPIOs, error %d\n", r);
+		return;
+	}
 
 	omap_display_init(&ldp_dss_data);
 }
 
 static int ldp_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio)
 {
-	ldp_panel_data.gpios[0] = gpio + 7;
-	ldp_panel_data.gpio_invert[0] = true;
+	/* LCD enable GPIO */
+	ldp_lcd_pdata.enable_gpio = gpio + 7;
 
-	ldp_panel_data.gpios[1] = gpio + 15;
-	ldp_panel_data.gpio_invert[1] = true;
+	/* Backlight enable GPIO */
+	ldp_lcd_pdata.backlight_gpio = gpio + 15;
 
 	return 0;
 }
@@ -322,6 +347,7 @@ static struct omap2_hsmmc_info mmc[] __initdata = {
 
 static struct platform_device *ldp_devices[] __initdata = {
 	&ldp_gpio_keys_device,
+	&ldp_lcd_device,
 };
 
 #ifdef CONFIG_OMAP_MUX
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 04c116555412..f26918467efc 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -33,7 +33,7 @@
 #include <linux/mtd/nand.h>
 #include <linux/mmc/host.h>
 #include <linux/usb/phy.h>
-#include <linux/usb/nop-usb-xceiv.h>
+#include <linux/usb/usb_phy_gen_xceiv.h>
 
 #include <linux/regulator/machine.h>
 #include <linux/i2c/twl.h>
@@ -225,35 +225,46 @@ static struct mtd_partition omap3beagle_nand_partitions[] = {
 
 /* DSS */
 
-static struct tfp410_platform_data dvi_panel = {
-	.i2c_bus_num = 3,
-	.power_down_gpio = -1,
+static struct connector_dvi_platform_data beagle_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = 3,
 };
 
-static struct omap_dss_device beagle_dvi_device = {
-	.type = OMAP_DISPLAY_TYPE_DPI,
-	.name = "dvi",
-	.driver_name = "tfp410",
-	.data = &dvi_panel,
-	.phy.dpi.data_lines = 24,
+static struct platform_device beagle_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &beagle_dvi_connector_pdata,
 };
 
-static struct omap_dss_device beagle_tv_device = {
+static struct encoder_tfp410_platform_data beagle_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = -1,
+};
+
+static struct platform_device beagle_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &beagle_tfp410_pdata,
+};
+
+static struct connector_atv_platform_data beagle_tv_pdata = {
 	.name = "tv",
-	.driver_name = "venc",
-	.type = OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device *beagle_dss_devices[] = {
-	&beagle_dvi_device,
-	&beagle_tv_device,
+static struct platform_device beagle_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &beagle_tv_pdata,
 };
 
 static struct omap_dss_board_info beagle_dss_data = {
-	.num_devices = ARRAY_SIZE(beagle_dss_devices),
-	.devices = beagle_dss_devices,
-	.default_device = &beagle_dvi_device,
+	.default_display_name = "dvi",
 };
 
 #include "sdram-micron-mt46h32m32lf-6.h"
@@ -279,7 +290,7 @@ static struct regulator_consumer_supply beagle_vsim_supply[] = {
 static struct gpio_led gpio_leds[];
 
 /* PHY's VCC regulator might be added later, so flag that we need it */
-static struct nop_usb_xceiv_platform_data hsusb2_phy_data = {
+static struct usb_phy_gen_xceiv_platform_data hsusb2_phy_data = {
 	.needs_vcc = true,
 };
 
@@ -332,7 +343,11 @@ static int beagle_twl_gpio_setup(struct device *dev,
 		if (gpio_request_one(gpio + 1, GPIOF_IN, "EHCI_nOC"))
 			pr_err("%s: unable to configure EHCI_nOC\n", __func__);
 	}
-	dvi_panel.power_down_gpio = beagle_config.dvi_pd_gpio;
+	beagle_tfp410_pdata.power_down_gpio = beagle_config.dvi_pd_gpio;
+
+	platform_device_register(&beagle_tfp410_device);
+	platform_device_register(&beagle_dvi_connector_device);
+	platform_device_register(&beagle_tv_connector_device);
 
 	/* TWL4030_GPIO_MAX i.e. LED_GPO controls HS USB Port 2 power */
 	phy_data[0].vcc_gpio = gpio + TWL4030_GPIO_MAX;
@@ -547,6 +562,7 @@ static void __init omap3_beagle_init(void)
 	if (gpio_is_valid(beagle_config.dvi_pd_gpio))
 		omap_mux_init_gpio(beagle_config.dvi_pd_gpio, OMAP_PIN_OUTPUT);
 	omap_display_init(&beagle_dss_data);
+
 	omap_serial_init();
 	omap_sdrc_init(mt46h32m32lf6_sdrc_params,
 				  mt46h32m32lf6_sdrc_params);
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 8c026269baca..18143873346c 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -33,7 +33,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/usb/otg.h>
 #include <linux/usb/musb.h>
-#include <linux/usb/nop-usb-xceiv.h>
+#include <linux/usb/usb_phy_gen_xceiv.h>
 #include <linux/smsc911x.h>
 
 #include <linux/wl12xx.h>
@@ -166,14 +166,6 @@ static inline void __init omap3evm_init_smsc911x(void) { return; }
  */
 #define OMAP3EVM_DVI_PANEL_EN_GPIO	199
 
-static struct panel_sharp_ls037v7dw01_data omap3_evm_lcd_data = {
-	.resb_gpio = OMAP3EVM_LCD_PANEL_RESB,
-	.ini_gpio = OMAP3EVM_LCD_PANEL_INI,
-	.mo_gpio = OMAP3EVM_LCD_PANEL_QVGA,
-	.lr_gpio = OMAP3EVM_LCD_PANEL_LR,
-	.ud_gpio = OMAP3EVM_LCD_PANEL_UD,
-};
-
 #ifdef CONFIG_BROKEN
 static void __init omap3_evm_display_init(void)
 {
@@ -196,44 +188,65 @@ static void __init omap3_evm_display_init(void)
 }
 #endif
 
-static struct omap_dss_device omap3_evm_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "sharp_ls_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 18,
-	.data			= &omap3_evm_lcd_data,
+static struct panel_sharp_ls037v7dw01_platform_data omap3_evm_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 18,
+
+	.resb_gpio		= OMAP3EVM_LCD_PANEL_RESB,
+	.ini_gpio		= OMAP3EVM_LCD_PANEL_INI,
+	.mo_gpio		= OMAP3EVM_LCD_PANEL_QVGA,
+	.lr_gpio		= OMAP3EVM_LCD_PANEL_LR,
+	.ud_gpio		= OMAP3EVM_LCD_PANEL_UD,
+};
+
+static struct platform_device omap3_evm_lcd_device = {
+	.name                   = "panel-sharp-ls037v7dw01",
+	.id                     = 0,
+	.dev.platform_data      = &omap3_evm_lcd_pdata,
+};
+
+static struct connector_dvi_platform_data omap3_evm_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = -1,
+};
+
+static struct platform_device omap3_evm_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &omap3_evm_dvi_connector_pdata,
 };
 
-static struct omap_dss_device omap3_evm_tv_device = {
-	.name			= "tv",
-	.driver_name		= "venc",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct encoder_tfp410_platform_data omap3_evm_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = OMAP3EVM_DVI_PANEL_EN_GPIO,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= OMAP3EVM_DVI_PANEL_EN_GPIO,
-	.i2c_bus_num		= -1,
+static struct platform_device omap3_evm_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &omap3_evm_tfp410_pdata,
 };
 
-static struct omap_dss_device omap3_evm_dvi_device = {
-	.name			= "dvi",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct connector_atv_platform_data omap3_evm_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device *omap3_evm_dss_devices[] = {
-	&omap3_evm_lcd_device,
-	&omap3_evm_tv_device,
-	&omap3_evm_dvi_device,
+static struct platform_device omap3_evm_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &omap3_evm_tv_pdata,
 };
 
 static struct omap_dss_board_info omap3_evm_dss_data = {
-	.num_devices	= ARRAY_SIZE(omap3_evm_dss_devices),
-	.devices	= omap3_evm_dss_devices,
-	.default_device	= &omap3_evm_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static struct regulator_consumer_supply omap3evm_vmmc1_supply[] = {
@@ -468,7 +481,7 @@ struct wl12xx_platform_data omap3evm_wlan_data __initdata = {
 static struct regulator_consumer_supply omap3evm_vaux2_supplies[] = {
 	REGULATOR_SUPPLY("VDD_CSIPHY1", "omap3isp"),	/* OMAP ISP */
 	REGULATOR_SUPPLY("VDD_CSIPHY2", "omap3isp"),	/* OMAP ISP */
-	REGULATOR_SUPPLY("vcc", "nop_usb_xceiv.2"),	/* hsusb port 2 */
+	REGULATOR_SUPPLY("vcc", "usb_phy_gen_xceiv.2"),	/* hsusb port 2 */
 	REGULATOR_SUPPLY("vaux2", NULL),
 };
 
@@ -678,6 +691,10 @@ static void __init omap3_evm_init(void)
 	omap3_evm_i2c_init();
 
 	omap_display_init(&omap3_evm_dss_data);
+	platform_device_register(&omap3_evm_lcd_device);
+	platform_device_register(&omap3_evm_tfp410_device);
+	platform_device_register(&omap3_evm_dvi_connector_device);
+	platform_device_register(&omap3_evm_tv_connector_device);
 
 	omap_serial_init();
 	omap_sdrc_init(mt46h32m32lf6_sdrc_params, NULL);
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index b1547a0edfcd..de1bc6bbe585 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -231,34 +231,21 @@ static struct twl4030_keypad_data pandora_kp_data = {
 	.rep		= 1,
 };
 
-static struct panel_tpo_td043_data lcd_data = {
-	.nreset_gpio		= 157,
-};
-
-static struct omap_dss_device pandora_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "tpo_td043mtea1_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 24,
-	.data			= &lcd_data,
-};
-
-static struct omap_dss_device pandora_tv_device = {
-	.name			= "tv",
-	.driver_name		= "venc",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
+static struct connector_atv_platform_data pandora_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device *pandora_dss_devices[] = {
-	&pandora_lcd_device,
-	&pandora_tv_device,
+static struct platform_device pandora_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &pandora_tv_pdata,
 };
 
 static struct omap_dss_board_info pandora_dss_data = {
-	.num_devices	= ARRAY_SIZE(pandora_dss_devices),
-	.devices	= pandora_dss_devices,
-	.default_device	= &pandora_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static void pandora_wl1251_init_card(struct mmc_card *card)
@@ -348,11 +335,11 @@ static struct regulator_consumer_supply pandora_vdds_supplies[] = {
 };
 
 static struct regulator_consumer_supply pandora_vcc_lcd_supply[] = {
-	REGULATOR_SUPPLY("vcc", "display0"),
+	REGULATOR_SUPPLY("vcc", "spi1.1"),
 };
 
 static struct regulator_consumer_supply pandora_usb_phy_supply[] = {
-	REGULATOR_SUPPLY("vcc", "nop_usb_xceiv.2"),	/* hsusb port 2 */
+	REGULATOR_SUPPLY("vcc", "usb_phy_gen_xceiv.2"),	/* hsusb port 2 */
 };
 
 /* ads7846 on SPI and 2 nub controllers on I2C */
@@ -529,13 +516,21 @@ static int __init omap3pandora_i2c_init(void)
 	return 0;
 }
 
+static struct panel_tpo_td043mtea1_platform_data pandora_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
+
+	.data_lines		= 24,
+	.nreset_gpio		= 157,
+};
+
 static struct spi_board_info omap3pandora_spi_board_info[] __initdata = {
 	{
-		.modalias		= "tpo_td043mtea1_panel_spi",
+		.modalias		= "panel-tpo-td043mtea1",
 		.bus_num		= 1,
 		.chip_select		= 1,
 		.max_speed_hz		= 375000,
-		.platform_data		= &pandora_lcd_device,
+		.platform_data		= &pandora_lcd_pdata,
 	}
 };
 
@@ -580,6 +575,7 @@ static struct platform_device *omap3pandora_devices[] __initdata = {
 	&pandora_keys_gpio,
 	&pandora_vwlan_device,
 	&pandora_backlight,
+	&pandora_tv_connector_device,
 };
 
 static struct usbhs_omap_platform_data usbhs_bdata __initdata = {
diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index d37e6b187ae4..ba8342fef799 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c
@@ -93,40 +93,50 @@ static void __init omap3_stalker_display_init(void)
 {
 	return;
 }
+static struct connector_dvi_platform_data omap3stalker_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = -1,
+};
 
-static struct omap_dss_device omap3_stalker_tv_device = {
-	.name			= "tv",
-	.driver_name		= "venc",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-#if defined(CONFIG_OMAP2_VENC_OUT_TYPE_SVIDEO)
-	.phy.venc.type		= OMAP_DSS_VENC_TYPE_SVIDEO,
-#elif defined(CONFIG_OMAP2_VENC_OUT_TYPE_COMPOSITE)
-	.u.venc.type		= OMAP_DSS_VENC_TYPE_COMPOSITE,
-#endif
+static struct platform_device omap3stalker_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &omap3stalker_dvi_connector_pdata,
 };
 
-static struct tfp410_platform_data dvi_panel = {
-	.power_down_gpio	= DSS_ENABLE_GPIO,
-	.i2c_bus_num		= -1,
+static struct encoder_tfp410_platform_data omap3stalker_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = DSS_ENABLE_GPIO,
 };
 
-static struct omap_dss_device omap3_stalker_dvi_device = {
-	.name			= "dvi",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct platform_device omap3stalker_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &omap3stalker_tfp410_pdata,
+};
+
+static struct connector_atv_platform_data omap3stalker_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+#if defined(CONFIG_OMAP2_VENC_OUT_TYPE_SVIDEO)
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+#elif defined(CONFIG_OMAP2_VENC_OUT_TYPE_COMPOSITE)
+	.connector_type = OMAP_DSS_VENC_TYPE_COMPOSITE,
+#endif
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device *omap3_stalker_dss_devices[] = {
-	&omap3_stalker_tv_device,
-	&omap3_stalker_dvi_device,
+static struct platform_device omap3stalker_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &omap3stalker_tv_pdata,
 };
 
 static struct omap_dss_board_info omap3_stalker_dss_data = {
-	.num_devices	= ARRAY_SIZE(omap3_stalker_dss_devices),
-	.devices	= omap3_stalker_dss_devices,
-	.default_device	= &omap3_stalker_dvi_device,
+	.default_display_name = "dvi",
 };
 
 static struct regulator_consumer_supply omap3stalker_vmmc1_supply[] = {
@@ -356,6 +366,9 @@ static struct usbhs_phy_data phy_data[] __initdata = {
 
 static struct platform_device *omap3_stalker_devices[] __initdata = {
 	&keys_gpio,
+	&omap3stalker_tfp410_device,
+	&omap3stalker_dvi_connector_device,
+	&omap3stalker_tv_connector_device,
 };
 
 static struct usbhs_omap_platform_data usbhs_bdata __initdata = {
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index 5748b5d06c23..f6d384111911 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -72,6 +72,9 @@
 #define OVERO_SMSC911X2_CS     4
 #define OVERO_SMSC911X2_GPIO   65
 
+/* whether to register LCD35 instead of LCD43 */
+static bool overo_use_lcd35;
+
 #if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
 	defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
 
@@ -149,78 +152,94 @@ static inline void __init overo_init_smsc911x(void) { return; }
 #define OVERO_GPIO_LCD_EN 144
 #define OVERO_GPIO_LCD_BL 145
 
-static struct tfp410_platform_data dvi_panel = {
-	.i2c_bus_num		= 3,
-	.power_down_gpio	= -1,
+static struct connector_atv_platform_data overo_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_SVIDEO,
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device overo_dvi_device = {
-	.name			= "dvi",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "tfp410",
-	.data			= &dvi_panel,
-	.phy.dpi.data_lines	= 24,
+static struct platform_device overo_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &overo_tv_pdata,
 };
 
-static struct omap_dss_device overo_tv_device = {
-	.name = "tv",
-	.driver_name = "venc",
-	.type = OMAP_DISPLAY_TYPE_VENC,
-	.phy.venc.type = OMAP_DSS_VENC_TYPE_SVIDEO,
+static const struct display_timing overo_lcd43_videomode = {
+	.pixelclock	= { 0, 9200000, 0 },
+
+	.hactive = { 0, 480, 0 },
+	.hfront_porch = { 0, 8, 0 },
+	.hback_porch = { 0, 4, 0 },
+	.hsync_len = { 0, 41, 0 },
+
+	.vactive = { 0, 272, 0 },
+	.vfront_porch = { 0, 4, 0 },
+	.vback_porch = { 0, 2, 0 },
+	.vsync_len = { 0, 10, 0 },
+
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
 };
 
-static struct panel_generic_dpi_data lcd43_panel = {
-	.name			= "samsung_lte430wq_f0c",
-	.num_gpios		= 2,
-	.gpios			= {
-		OVERO_GPIO_LCD_EN,
-		OVERO_GPIO_LCD_BL
-	},
+static struct panel_dpi_platform_data overo_lcd43_pdata = {
+	.name                   = "lcd43",
+	.source                 = "dpi.0",
+
+	.data_lines		= 24,
+
+	.display_timing		= &overo_lcd43_videomode,
+
+	.enable_gpio		= OVERO_GPIO_LCD_EN,
+	.backlight_gpio		= OVERO_GPIO_LCD_BL,
 };
 
-static struct omap_dss_device overo_lcd43_device = {
-	.name			= "lcd43",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
-	.data			= &lcd43_panel,
-	.phy.dpi.data_lines	= 24,
+static struct platform_device overo_lcd43_device = {
+	.name                   = "panel-dpi",
+	.id                     = 0,
+	.dev.platform_data      = &overo_lcd43_pdata,
 };
 
-#if defined(CONFIG_PANEL_LGPHILIPS_LB035Q02) || \
-	defined(CONFIG_PANEL_LGPHILIPS_LB035Q02_MODULE)
-static struct panel_generic_dpi_data lcd35_panel = {
-	.num_gpios		= 2,
-	.gpios			= {
-		OVERO_GPIO_LCD_EN,
-		OVERO_GPIO_LCD_BL
-	},
+static struct connector_dvi_platform_data overo_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = 3,
 };
 
-static struct omap_dss_device overo_lcd35_device = {
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.name			= "lcd35",
-	.driver_name		= "lgphilips_lb035q02_panel",
-	.phy.dpi.data_lines	= 24,
-	.data			= &lcd35_panel,
+static struct platform_device overo_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &overo_dvi_connector_pdata,
 };
-#endif
 
-static struct omap_dss_device *overo_dss_devices[] = {
-	&overo_dvi_device,
-	&overo_tv_device,
-#if defined(CONFIG_PANEL_LGPHILIPS_LB035Q02) || \
-	defined(CONFIG_PANEL_LGPHILIPS_LB035Q02_MODULE)
-	&overo_lcd35_device,
-#endif
-	&overo_lcd43_device,
+static struct encoder_tfp410_platform_data overo_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = -1,
+};
+
+static struct platform_device overo_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &overo_tfp410_pdata,
 };
 
 static struct omap_dss_board_info overo_dss_data = {
-	.num_devices	= ARRAY_SIZE(overo_dss_devices),
-	.devices	= overo_dss_devices,
-	.default_device	= &overo_dvi_device,
+	.default_display_name = "lcd43",
 };
 
+static void __init overo_display_init(void)
+{
+	omap_display_init(&overo_dss_data);
+
+	if (!overo_use_lcd35)
+		platform_device_register(&overo_lcd43_device);
+	platform_device_register(&overo_tfp410_device);
+	platform_device_register(&overo_dvi_connector_device);
+	platform_device_register(&overo_tv_connector_device);
+}
+
 static struct mtd_partition overo_nand_partitions[] = {
 	{
 		.name           = "xloader",
@@ -408,24 +427,41 @@ static int __init overo_i2c_init(void)
 	return 0;
 }
 
+static struct panel_lb035q02_platform_data overo_lcd35_pdata = {
+	.name                   = "lcd35",
+	.source                 = "dpi.0",
+
+	.data_lines		= 24,
+
+	.enable_gpio		= OVERO_GPIO_LCD_EN,
+	.backlight_gpio		= OVERO_GPIO_LCD_BL,
+};
+
+/*
+ * NOTE: We need to add either the lgphilips panel, or the lcd43 panel. The
+ * selection is done based on the overo_use_lcd35 field. If new SPI
+ * devices are added here, extra work is needed to make only the lgphilips panel
+ * affected by the overo_use_lcd35 field.
+ */
 static struct spi_board_info overo_spi_board_info[] __initdata = {
-#if defined(CONFIG_PANEL_LGPHILIPS_LB035Q02) || \
-	defined(CONFIG_PANEL_LGPHILIPS_LB035Q02_MODULE)
 	{
-		.modalias		= "lgphilips_lb035q02_panel-spi",
+		.modalias		= "panel_lgphilips_lb035q02",
 		.bus_num		= 1,
 		.chip_select		= 1,
 		.max_speed_hz		= 500000,
 		.mode			= SPI_MODE_3,
+		.platform_data		= &overo_lcd35_pdata,
 	},
-#endif
 };
 
 static int __init overo_spi_init(void)
 {
 	overo_ads7846_init();
-	spi_register_board_info(overo_spi_board_info,
-			ARRAY_SIZE(overo_spi_board_info));
+
+	if (overo_use_lcd35) {
+		spi_register_board_info(overo_spi_board_info,
+				ARRAY_SIZE(overo_spi_board_info));
+	}
 	return 0;
 }
 
@@ -463,11 +499,13 @@ static void __init overo_init(void)
 {
 	int ret;
 
+	if (strstr(boot_command_line, "omapdss.def_disp=lcd35"))
+		overo_use_lcd35 = true;
+
 	regulator_register_fixed(0, dummy_supplies, ARRAY_SIZE(dummy_supplies));
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
 	overo_i2c_init();
 	omap_hsmmc_init(mmc);
-	omap_display_init(&overo_dss_data);
 	omap_serial_init();
 	omap_sdrc_init(mt46h32m32lf6_sdrc_params,
 				  mt46h32m32lf6_sdrc_params);
@@ -484,6 +522,8 @@ static void __init overo_init(void)
 	overo_init_keys();
 	omap_twl4030_audio_init("overo", NULL);
 
+	overo_display_init();
+
 	/* Ensure SDRC pins are mux'd for self-refresh */
 	omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT);
 	omap_mux_init_signal("sdrc_cke1", OMAP_PIN_OUTPUT);
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 9c2dd102fbbb..c3270c0f1fce 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -45,6 +45,8 @@
 #include <linux/platform_data/tsl2563.h>
 #include <linux/lis3lv02d.h>
 
+#include <video/omap-panel-data.h>
+
 #if defined(CONFIG_IR_RX51) || defined(CONFIG_IR_RX51_MODULE)
 #include <media/ir-rx51.h>
 #endif
@@ -226,6 +228,15 @@ static struct lp55xx_platform_data rx51_lp5523_platform_data = {
 };
 #endif
 
+#define RX51_LCD_RESET_GPIO	90
+
+static struct panel_acx565akm_platform_data acx_pdata = {
+	.name		= "lcd",
+	.source		= "sdi.0",
+	.reset_gpio	= RX51_LCD_RESET_GPIO,
+	.datapairs	= 2,
+};
+
 static struct omap2_mcspi_device_config wl1251_mcspi_config = {
 	.turbo_mode	= 0,
 };
@@ -254,6 +265,7 @@ static struct spi_board_info rx51_peripherals_spi_board_info[] __initdata = {
 		.chip_select		= 2,
 		.max_speed_hz		= 6000000,
 		.controller_data	= &mipid_mcspi_config,
+		.platform_data		= &acx_pdata,
 	},
 	[RX51_SPI_TSC2005] = {
 		.modalias		= "tsc2005",
diff --git a/arch/arm/mach-omap2/board-rx51-video.c b/arch/arm/mach-omap2/board-rx51-video.c
index bdd1e3a179e1..43a90c8d6837 100644
--- a/arch/arm/mach-omap2/board-rx51-video.c
+++ b/arch/arm/mach-omap2/board-rx51-video.c
@@ -29,34 +29,21 @@
 
 #if defined(CONFIG_FB_OMAP2) || defined(CONFIG_FB_OMAP2_MODULE)
 
-static struct panel_acx565akm_data lcd_data = {
-	.reset_gpio	= RX51_LCD_RESET_GPIO,
+static struct connector_atv_platform_data rx51_tv_pdata = {
+	.name = "tv",
+	.source = "venc.0",
+	.connector_type = OMAP_DSS_VENC_TYPE_COMPOSITE,
+	.invert_polarity = false,
 };
 
-static struct omap_dss_device rx51_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "panel-acx565akm",
-	.type			= OMAP_DISPLAY_TYPE_SDI,
-	.phy.sdi.datapairs	= 2,
-	.data			= &lcd_data,
-};
-
-static struct omap_dss_device  rx51_tv_device = {
-	.name			= "tv",
-	.type			= OMAP_DISPLAY_TYPE_VENC,
-	.driver_name		= "venc",
-	.phy.venc.type	        = OMAP_DSS_VENC_TYPE_COMPOSITE,
-};
-
-static struct omap_dss_device *rx51_dss_devices[] = {
-	&rx51_lcd_device,
-	&rx51_tv_device,
+static struct platform_device rx51_tv_connector_device = {
+	.name                   = "connector-analog-tv",
+	.id                     = 0,
+	.dev.platform_data      = &rx51_tv_pdata,
 };
 
 static struct omap_dss_board_info rx51_dss_board_info = {
-	.num_devices	= ARRAY_SIZE(rx51_dss_devices),
-	.devices	= rx51_dss_devices,
-	.default_device	= &rx51_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static int __init rx51_video_init(void)
@@ -71,6 +58,8 @@ static int __init rx51_video_init(void)
 
 	omap_display_init(&rx51_dss_board_info);
 
+	platform_device_register(&rx51_tv_connector_device);
+
 	return 0;
 }
 
diff --git a/arch/arm/mach-omap2/board-zoom-display.c b/arch/arm/mach-omap2/board-zoom-display.c
index c2a079cb76fc..3d8ecc1e05bd 100644
--- a/arch/arm/mach-omap2/board-zoom-display.c
+++ b/arch/arm/mach-omap2/board-zoom-display.c
@@ -25,32 +25,23 @@
 #define LCD_PANEL_RESET_GPIO_PILOT	55
 #define LCD_PANEL_QVGA_GPIO		56
 
-static struct panel_nec_nl8048_data zoom_lcd_data = {
-	/* res_gpio filled in code */
-	.qvga_gpio = LCD_PANEL_QVGA_GPIO,
-};
+static struct panel_nec_nl8048hl11_platform_data zoom_lcd_pdata = {
+	.name                   = "lcd",
+	.source                 = "dpi.0",
 
-static struct omap_dss_device zoom_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "NEC_8048_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 24,
-	.data			= &zoom_lcd_data,
-};
+	.data_lines		= 24,
 
-static struct omap_dss_device *zoom_dss_devices[] = {
-	&zoom_lcd_device,
+	.res_gpio		= -1,	/* filled in code */
+	.qvga_gpio		= LCD_PANEL_QVGA_GPIO,
 };
 
 static struct omap_dss_board_info zoom_dss_data = {
-	.num_devices		= ARRAY_SIZE(zoom_dss_devices),
-	.devices		= zoom_dss_devices,
-	.default_device		= &zoom_lcd_device,
+	.default_display_name = "lcd",
 };
 
 static void __init zoom_lcd_panel_init(void)
 {
-	zoom_lcd_data.res_gpio = (omap_rev() > OMAP3430_REV_ES3_0) ?
+	zoom_lcd_pdata.res_gpio = (omap_rev() > OMAP3430_REV_ES3_0) ?
 			LCD_PANEL_RESET_GPIO_PROD :
 			LCD_PANEL_RESET_GPIO_PILOT;
 }
@@ -61,19 +52,20 @@ static struct omap2_mcspi_device_config dss_lcd_mcspi_config = {
 
 static struct spi_board_info nec_8048_spi_board_info[] __initdata = {
 	[0] = {
-		.modalias		= "nec_8048_spi",
+		.modalias		= "panel-nec-nl8048hl11",
 		.bus_num		= 1,
 		.chip_select		= 2,
 		.max_speed_hz		= 375000,
 		.controller_data	= &dss_lcd_mcspi_config,
+		.platform_data		= &zoom_lcd_pdata,
 	},
 };
 
 void __init zoom_display_init(void)
 {
 	omap_display_init(&zoom_dss_data);
+	zoom_lcd_panel_init();
 	spi_register_board_info(nec_8048_spi_board_info,
 				ARRAY_SIZE(nec_8048_spi_board_info));
-	zoom_lcd_panel_init();
 }
 
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index ff37be1f6f93..03a0516c7f67 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -400,7 +400,7 @@ int __init omap_display_init(struct omap_dss_board_info *board_data)
 
 	/* Create devices for DPI and SDI */
 
-	pdev = create_simple_dss_pdev("omapdss_dpi", -1,
+	pdev = create_simple_dss_pdev("omapdss_dpi", 0,
 			board_data, sizeof(*board_data), dss_pdev);
 	if (IS_ERR(pdev)) {
 		pr_err("Could not build platform_device for omapdss_dpi\n");
@@ -408,7 +408,7 @@ int __init omap_display_init(struct omap_dss_board_info *board_data)
 	}
 
 	if (cpu_is_omap34xx()) {
-		pdev = create_simple_dss_pdev("omapdss_sdi", -1,
+		pdev = create_simple_dss_pdev("omapdss_sdi", 0,
 				board_data, sizeof(*board_data), dss_pdev);
 		if (IS_ERR(pdev)) {
 			pr_err("Could not build platform_device for omapdss_sdi\n");
diff --git a/arch/arm/mach-omap2/dss-common.c b/arch/arm/mach-omap2/dss-common.c
index 043e5705f2a6..bf89effa4c99 100644
--- a/arch/arm/mach-omap2/dss-common.c
+++ b/arch/arm/mach-omap2/dss-common.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/gpio.h>
+#include <linux/platform_device.h>
 
 #include <video/omapdss.h>
 #include <video/omap-panel-data.h>
@@ -37,70 +38,76 @@
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
 #define HDMI_GPIO_HPD  63 /* Hotplug detect */
 
-/* Display DVI */
 #define PANDA_DVI_TFP410_POWER_DOWN_GPIO	0
 
-/* Using generic display panel */
-static struct tfp410_platform_data omap4_dvi_panel = {
-	.i2c_bus_num		= 2,
-	.power_down_gpio	= PANDA_DVI_TFP410_POWER_DOWN_GPIO,
+/* DVI Connector */
+static struct connector_dvi_platform_data omap4_panda_dvi_connector_pdata = {
+	.name                   = "dvi",
+	.source                 = "tfp410.0",
+	.i2c_bus_num            = 2,
 };
 
-static struct omap_dss_device omap4_panda_dvi_device = {
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.name			= "dvi",
-	.driver_name		= "tfp410",
-	.data			= &omap4_dvi_panel,
-	.phy.dpi.data_lines	= 24,
-	.channel		= OMAP_DSS_CHANNEL_LCD2,
+static struct platform_device omap4_panda_dvi_connector_device = {
+	.name                   = "connector-dvi",
+	.id                     = 0,
+	.dev.platform_data      = &omap4_panda_dvi_connector_pdata,
 };
 
-static struct omap_dss_hdmi_data omap4_panda_hdmi_data = {
+/* TFP410 DPI-to-DVI chip */
+static struct encoder_tfp410_platform_data omap4_panda_tfp410_pdata = {
+	.name                   = "tfp410.0",
+	.source                 = "dpi.0",
+	.data_lines             = 24,
+	.power_down_gpio        = PANDA_DVI_TFP410_POWER_DOWN_GPIO,
+};
+
+static struct platform_device omap4_panda_tfp410_device = {
+	.name                   = "tfp410",
+	.id                     = 0,
+	.dev.platform_data      = &omap4_panda_tfp410_pdata,
+};
+
+/* HDMI Connector */
+static struct connector_hdmi_platform_data omap4_panda_hdmi_connector_pdata = {
+	.name                   = "hdmi",
+	.source                 = "tpd12s015.0",
+};
+
+static struct platform_device omap4_panda_hdmi_connector_device = {
+	.name                   = "connector-hdmi",
+	.id                     = 0,
+	.dev.platform_data      = &omap4_panda_hdmi_connector_pdata,
+};
+
+/* TPD12S015 HDMI ESD protection & level shifter chip */
+static struct encoder_tpd12s015_platform_data omap4_panda_tpd_pdata = {
+	.name                   = "tpd12s015.0",
+	.source                 = "hdmi.0",
+
 	.ct_cp_hpd_gpio = HDMI_GPIO_CT_CP_HPD,
 	.ls_oe_gpio = HDMI_GPIO_LS_OE,
 	.hpd_gpio = HDMI_GPIO_HPD,
 };
 
-static struct omap_dss_device  omap4_panda_hdmi_device = {
-	.name = "hdmi",
-	.driver_name = "hdmi_panel",
-	.type = OMAP_DISPLAY_TYPE_HDMI,
-	.channel = OMAP_DSS_CHANNEL_DIGIT,
-	.data = &omap4_panda_hdmi_data,
-};
-
-static struct omap_dss_device *omap4_panda_dss_devices[] = {
-	&omap4_panda_dvi_device,
-	&omap4_panda_hdmi_device,
+static struct platform_device omap4_panda_tpd_device = {
+	.name                   = "tpd12s015",
+	.id                     = 0,
+	.dev.platform_data      = &omap4_panda_tpd_pdata,
 };
 
 static struct omap_dss_board_info omap4_panda_dss_data = {
-	.num_devices	= ARRAY_SIZE(omap4_panda_dss_devices),
-	.devices	= omap4_panda_dss_devices,
-	.default_device	= &omap4_panda_dvi_device,
+	.default_display_name = "dvi",
 };
 
-void __init omap4_panda_display_init(void)
+void __init omap4_panda_display_init_of(void)
 {
 	omap_display_init(&omap4_panda_dss_data);
 
-	/*
-	 * OMAP4460SDP/Blaze and OMAP4430 ES2.3 SDP/Blaze boards and
-	 * later have external pull up on the HDMI I2C lines
-	 */
-	if (cpu_is_omap446x() || omap_rev() > OMAP4430_REV_ES2_2)
-		omap_hdmi_init(OMAP_HDMI_SDA_SCL_EXTERNAL_PULLUP);
-	else
-		omap_hdmi_init(0);
-
-	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
-	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
-	omap_mux_init_gpio(HDMI_GPIO_HPD, OMAP_PIN_INPUT_PULLDOWN);
-}
+	platform_device_register(&omap4_panda_tfp410_device);
+	platform_device_register(&omap4_panda_dvi_connector_device);
 
-void __init omap4_panda_display_init_of(void)
-{
-	omap_display_init(&omap4_panda_dss_data);
+	platform_device_register(&omap4_panda_tpd_device);
+	platform_device_register(&omap4_panda_hdmi_connector_device);
 }
 
 
@@ -109,93 +116,73 @@ void __init omap4_panda_display_init_of(void)
 #define DISPLAY_SEL_GPIO	59	/* LCD2/PicoDLP switch */
 #define DLP_POWER_ON_GPIO	40
 
-static struct nokia_dsi_panel_data dsi1_panel = {
-		.name		= "taal",
-		.reset_gpio	= 102,
-		.use_ext_te	= false,
-		.ext_te_gpio	= 101,
-		.esd_interval	= 0,
-		.pin_config = {
-			.num_pins	= 6,
-			.pins		= { 0, 1, 2, 3, 4, 5 },
-		},
-};
-
-static struct omap_dss_device sdp4430_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "taal",
-	.type			= OMAP_DISPLAY_TYPE_DSI,
-	.data			= &dsi1_panel,
-	.phy.dsi		= {
-		.module		= 0,
+static struct panel_dsicm_platform_data dsi1_panel = {
+	.name		= "lcd",
+	.source		= "dsi.0",
+	.reset_gpio	= 102,
+	.use_ext_te	= false,
+	.ext_te_gpio	= 101,
+	.pin_config = {
+		.num_pins	= 6,
+		.pins		= { 0, 1, 2, 3, 4, 5 },
 	},
-	.channel		= OMAP_DSS_CHANNEL_LCD,
 };
 
-static struct nokia_dsi_panel_data dsi2_panel = {
-		.name		= "taal",
-		.reset_gpio	= 104,
-		.use_ext_te	= false,
-		.ext_te_gpio	= 103,
-		.esd_interval	= 0,
-		.pin_config = {
-			.num_pins	= 6,
-			.pins		= { 0, 1, 2, 3, 4, 5 },
-		},
+static struct platform_device sdp4430_lcd_device = {
+	.name                   = "panel-dsi-cm",
+	.id                     = 0,
+	.dev.platform_data	= &dsi1_panel,
 };
 
-static struct omap_dss_device sdp4430_lcd2_device = {
-	.name			= "lcd2",
-	.driver_name		= "taal",
-	.type			= OMAP_DISPLAY_TYPE_DSI,
-	.data			= &dsi2_panel,
-	.phy.dsi		= {
-
-		.module		= 1,
+static struct panel_dsicm_platform_data dsi2_panel = {
+	.name		= "lcd2",
+	.source		= "dsi.1",
+	.reset_gpio	= 104,
+	.use_ext_te	= false,
+	.ext_te_gpio	= 103,
+	.pin_config = {
+		.num_pins	= 6,
+		.pins		= { 0, 1, 2, 3, 4, 5 },
 	},
-	.channel		= OMAP_DSS_CHANNEL_LCD2,
 };
 
-static struct omap_dss_hdmi_data sdp4430_hdmi_data = {
-	.ct_cp_hpd_gpio = HDMI_GPIO_CT_CP_HPD,
-	.ls_oe_gpio = HDMI_GPIO_LS_OE,
-	.hpd_gpio = HDMI_GPIO_HPD,
+static struct platform_device sdp4430_lcd2_device = {
+	.name                   = "panel-dsi-cm",
+	.id                     = 1,
+	.dev.platform_data	= &dsi2_panel,
 };
 
-static struct omap_dss_device sdp4430_hdmi_device = {
-	.name = "hdmi",
-	.driver_name = "hdmi_panel",
-	.type = OMAP_DISPLAY_TYPE_HDMI,
-	.channel = OMAP_DSS_CHANNEL_DIGIT,
-	.data = &sdp4430_hdmi_data,
+/* HDMI Connector */
+static struct connector_hdmi_platform_data sdp4430_hdmi_connector_pdata = {
+	.name                   = "hdmi",
+	.source                 = "tpd12s015.0",
 };
 
-static struct picodlp_panel_data sdp4430_picodlp_pdata = {
-	.picodlp_adapter_id	= 2,
-	.emu_done_gpio		= 44,
-	.pwrgood_gpio		= 45,
+static struct platform_device sdp4430_hdmi_connector_device = {
+	.name                   = "connector-hdmi",
+	.id                     = 0,
+	.dev.platform_data      = &sdp4430_hdmi_connector_pdata,
 };
 
-static struct omap_dss_device sdp4430_picodlp_device = {
-	.name			= "picodlp",
-	.driver_name		= "picodlp_panel",
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.phy.dpi.data_lines	= 24,
-	.channel		= OMAP_DSS_CHANNEL_LCD2,
-	.data			= &sdp4430_picodlp_pdata,
+/* TPD12S015 HDMI ESD protection & level shifter chip */
+static struct encoder_tpd12s015_platform_data sdp4430_tpd_pdata = {
+	.name                   = "tpd12s015.0",
+	.source                 = "hdmi.0",
+
+	.ct_cp_hpd_gpio = HDMI_GPIO_CT_CP_HPD,
+	.ls_oe_gpio = HDMI_GPIO_LS_OE,
+	.hpd_gpio = HDMI_GPIO_HPD,
 };
 
-static struct omap_dss_device *sdp4430_dss_devices[] = {
-	&sdp4430_lcd_device,
-	&sdp4430_lcd2_device,
-	&sdp4430_hdmi_device,
-	&sdp4430_picodlp_device,
+static struct platform_device sdp4430_tpd_device = {
+	.name                   = "tpd12s015",
+	.id                     = 0,
+	.dev.platform_data      = &sdp4430_tpd_pdata,
 };
 
+
 static struct omap_dss_board_info sdp4430_dss_data = {
-	.num_devices	= ARRAY_SIZE(sdp4430_dss_devices),
-	.devices	= sdp4430_dss_devices,
-	.default_device	= &sdp4430_lcd_device,
+	.default_display_name = "lcd",
 };
 
 /*
@@ -204,7 +191,7 @@ static struct omap_dss_board_info sdp4430_dss_data = {
  * used by picodlp on the 4430sdp platform. Keep this gpio disabled as LCD2 is
  * selected by default
  */
-void __init omap_4430sdp_display_init(void)
+void __init omap_4430sdp_display_init_of(void)
 {
 	int r;
 
@@ -219,33 +206,10 @@ void __init omap_4430sdp_display_init(void)
 		pr_err("%s: Could not get DLP POWER ON GPIO\n", __func__);
 
 	omap_display_init(&sdp4430_dss_data);
-	/*
-	 * OMAP4460SDP/Blaze and OMAP4430 ES2.3 SDP/Blaze boards and
-	 * later have external pull up on the HDMI I2C lines
-	 */
-	if (cpu_is_omap446x() || omap_rev() > OMAP4430_REV_ES2_2)
-		omap_hdmi_init(OMAP_HDMI_SDA_SCL_EXTERNAL_PULLUP);
-	else
-		omap_hdmi_init(0);
-
-	omap_mux_init_gpio(HDMI_GPIO_LS_OE, OMAP_PIN_OUTPUT);
-	omap_mux_init_gpio(HDMI_GPIO_CT_CP_HPD, OMAP_PIN_OUTPUT);
-	omap_mux_init_gpio(HDMI_GPIO_HPD, OMAP_PIN_INPUT_PULLDOWN);
-}
-
-void __init omap_4430sdp_display_init_of(void)
-{
-	int r;
 
-	r = gpio_request_one(DISPLAY_SEL_GPIO, GPIOF_OUT_INIT_HIGH,
-			"display_sel");
-	if (r)
-		pr_err("%s: Could not get display_sel GPIO\n", __func__);
-
-	r = gpio_request_one(DLP_POWER_ON_GPIO, GPIOF_OUT_INIT_LOW,
-		"DLP POWER ON");
-	if (r)
-		pr_err("%s: Could not get DLP POWER ON GPIO\n", __func__);
+	platform_device_register(&sdp4430_lcd_device);
+	platform_device_register(&sdp4430_lcd2_device);
 
-	omap_display_init(&sdp4430_dss_data);
+	platform_device_register(&sdp4430_tpd_device);
+	platform_device_register(&sdp4430_hdmi_connector_device);
 }
diff --git a/arch/arm/mach-omap2/dss-common.h b/arch/arm/mach-omap2/dss-common.h
index 915f6fff5106..c28fe3c03588 100644
--- a/arch/arm/mach-omap2/dss-common.h
+++ b/arch/arm/mach-omap2/dss-common.h
@@ -6,9 +6,7 @@
  * This file will be removed when DSS supports DT.
  */
 
-void __init omap4_panda_display_init(void);
 void __init omap4_panda_display_init_of(void);
-void __init omap_4430sdp_display_init(void);
 void __init omap_4430sdp_display_init_of(void);
 
 #endif
diff --git a/arch/arm/mach-omap2/i2c.c b/arch/arm/mach-omap2/i2c.c
index d940e53dd9f2..b456b4471f35 100644
--- a/arch/arm/mach-omap2/i2c.c
+++ b/arch/arm/mach-omap2/i2c.c
@@ -181,7 +181,7 @@ int __init omap_i2c_add_bus(struct omap_i2c_bus_platform_data *i2c_pdata,
 				 sizeof(struct omap_i2c_bus_platform_data));
 	WARN(IS_ERR(pdev), "Could not build omap_device for %s\n", name);
 
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 static  int __init omap_i2c_cmdline(void)
diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index 2eb19d4d0aa1..e83a6a4b184a 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c
@@ -28,7 +28,7 @@
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/usb/phy.h>
-#include <linux/usb/nop-usb-xceiv.h>
+#include <linux/usb/usb_phy_gen_xceiv.h>
 
 #include "soc.h"
 #include "omap_device.h"
@@ -349,7 +349,7 @@ static struct fixed_voltage_config hsusb_reg_config = {
 	/* .init_data filled later */
 };
 
-static const char *nop_name = "nop_usb_xceiv"; /* NOP PHY driver */
+static const char *nop_name = "usb_phy_gen_xceiv"; /* NOP PHY driver */
 static const char *reg_name = "reg-fixed-voltage"; /* Regulator driver */
 
 /**
@@ -460,9 +460,9 @@ int usbhs_init_phys(struct usbhs_phy_data *phy, int num_phys)
 		pdevinfo.name = nop_name;
 		pdevinfo.id = phy->port;
 		pdevinfo.data = phy->platform_data;
-		pdevinfo.size_data = sizeof(struct nop_usb_xceiv_platform_data);
-
-		scnprintf(phy_id, MAX_STR, "nop_usb_xceiv.%d",
+		pdevinfo.size_data =
+			sizeof(struct usb_phy_gen_xceiv_platform_data);
+		scnprintf(phy_id, MAX_STR, "usb_phy_gen_xceiv.%d",
 					phy->port);
 		pdev = platform_device_register_full(&pdevinfo);
 		if (IS_ERR(pdev)) {
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index fa8885b2d5a5..35dd7f201a16 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2013  Renesas Solutions Corp.
  * Copyright (C) 2013  Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ * Copyright (C) 2013  Cogent Embedded, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -28,6 +29,7 @@
 #include <linux/smsc911x.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
+#include <media/soc_camera.h>
 #include <mach/common.h>
 #include <mach/irqs.h>
 #include <mach/r8a7778.h>
@@ -142,6 +144,25 @@ static struct sh_mmcif_plat_data sh_mmcif_plat = {
 			  MMC_CAP_NEEDS_POLL,
 };
 
+static struct rcar_vin_platform_data vin_platform_data __initdata = {
+	.flags	= RCAR_VIN_BT656,
+};
+
+/* In the default configuration both decoders reside on I2C bus 0 */
+#define BOCKW_CAMERA(idx)						\
+static struct i2c_board_info camera##idx##_info = {			\
+	I2C_BOARD_INFO("ml86v7667", 0x41 + 2 * (idx)),			\
+};									\
+									\
+static struct soc_camera_link iclink##idx##_ml86v7667 __initdata = {	\
+	.bus_id		= idx,						\
+	.i2c_adapter_id	= 0,						\
+	.board_info	= &camera##idx##_info,				\
+}
+
+BOCKW_CAMERA(0);
+BOCKW_CAMERA(1);
+
 static const struct pinctrl_map bockw_pinctrl_map[] = {
 	/* Ether */
 	PIN_MAP_MUX_GROUP_DEFAULT("r8a777x-ether", "pfc-r8a7778",
@@ -173,6 +194,16 @@ static const struct pinctrl_map bockw_pinctrl_map[] = {
 				  "sdhi0_cd", "sdhi0"),
 	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-r8a7778",
 				  "sdhi0_wp", "sdhi0"),
+	/* VIN0 */
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7778-vin.0", "pfc-r8a7778",
+				  "vin0_clk", "vin0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7778-vin.0", "pfc-r8a7778",
+				  "vin0_data8", "vin0"),
+	/* VIN1 */
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7778-vin.1", "pfc-r8a7778",
+				  "vin1_clk", "vin1"),
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7778-vin.1", "pfc-r8a7778",
+				  "vin1_data8", "vin1"),
 };
 
 #define FPGA	0x18200000
@@ -191,6 +222,16 @@ static void __init bockw_init(void)
 	r8a7778_add_i2c_device(0);
 	r8a7778_add_hspi_device(0);
 	r8a7778_add_mmc_device(&sh_mmcif_plat);
+	r8a7778_add_vin_device(0, &vin_platform_data);
+	/* VIN1 has a pin conflict with Ether */
+	if (!IS_ENABLED(CONFIG_SH_ETH))
+		r8a7778_add_vin_device(1, &vin_platform_data);
+	platform_device_register_data(&platform_bus, "soc-camera-pdrv", 0,
+				      &iclink0_ml86v7667,
+				      sizeof(iclink0_ml86v7667));
+	platform_device_register_data(&platform_bus, "soc-camera-pdrv", 1,
+				      &iclink1_ml86v7667,
+				      sizeof(iclink1_ml86v7667));
 
 	i2c_register_board_info(0, i2c0_devices,
 				ARRAY_SIZE(i2c0_devices));
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index a7d1010505bf..ca7fb2e63c60 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -1,8 +1,9 @@
 /*
  * marzen board support
  *
- * Copyright (C) 2011  Renesas Solutions Corp.
+ * Copyright (C) 2011, 2013  Renesas Solutions Corp.
  * Copyright (C) 2011  Magnus Damm
+ * Copyright (C) 2013  Cogent Embedded, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -37,6 +38,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
+#include <media/soc_camera.h>
 #include <mach/hardware.h>
 #include <mach/r8a7779.h>
 #include <mach/common.h>
@@ -178,12 +180,40 @@ static struct platform_device leds_device = {
 	},
 };
 
+static struct rcar_vin_platform_data vin_platform_data __initdata = {
+	.flags	= RCAR_VIN_BT656,
+};
+
+#define MARZEN_CAMERA(idx)					\
+static struct i2c_board_info camera##idx##_info = {		\
+	I2C_BOARD_INFO("adv7180", 0x20 + (idx)),		\
+};								\
+								\
+static struct soc_camera_link iclink##idx##_adv7180 = {		\
+	.bus_id		= 1 + 2 * (idx),			\
+	.i2c_adapter_id	= 0,					\
+	.board_info	= &camera##idx##_info,			\
+};								\
+								\
+static struct platform_device camera##idx##_device = {		\
+	.name	= "soc-camera-pdrv",				\
+	.id	= idx,						\
+	.dev	= {						\
+		.platform_data	= &iclink##idx##_adv7180,	\
+	},							\
+};
+
+MARZEN_CAMERA(0);
+MARZEN_CAMERA(1);
+
 static struct platform_device *marzen_devices[] __initdata = {
 	&eth_device,
 	&sdhi0_device,
 	&thermal_device,
 	&hspi_device,
 	&leds_device,
+	&camera0_device,
+	&camera1_device,
 };
 
 static const struct pinctrl_map marzen_pinctrl_map[] = {
@@ -219,6 +249,16 @@ static const struct pinctrl_map marzen_pinctrl_map[] = {
 	/* USB2 */
 	PIN_MAP_MUX_GROUP_DEFAULT("ehci-platform.1", "pfc-r8a7779",
 				  "usb2", "usb2"),
+	/* VIN1 */
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7779-vin.1", "pfc-r8a7779",
+				  "vin1_clk", "vin1"),
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7779-vin.1", "pfc-r8a7779",
+				  "vin1_data8", "vin1"),
+	/* VIN3 */
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7779-vin.3", "pfc-r8a7779",
+				  "vin3_clk", "vin3"),
+	PIN_MAP_MUX_GROUP_DEFAULT("r8a7779-vin.3", "pfc-r8a7779",
+				  "vin3_data8", "vin3"),
 };
 
 static void __init marzen_init(void)
@@ -235,6 +275,8 @@ static void __init marzen_init(void)
 
 	r8a7779_add_standard_devices();
 	r8a7779_add_usb_phy_device(&usb_phy_platform_data);
+	r8a7779_add_vin_device(1, &vin_platform_data);
+	r8a7779_add_vin_device(3, &vin_platform_data);
 	platform_add_devices(marzen_devices, ARRAY_SIZE(marzen_devices));
 }
 
diff --git a/arch/arm/mach-shmobile/clock-r8a7778.c b/arch/arm/mach-shmobile/clock-r8a7778.c
index a0e9eb72e46d..c4bf2d8fb111 100644
--- a/arch/arm/mach-shmobile/clock-r8a7778.c
+++ b/arch/arm/mach-shmobile/clock-r8a7778.c
@@ -106,6 +106,7 @@ enum {
 	MSTP331,
 	MSTP323, MSTP322, MSTP321,
 	MSTP114,
+	MSTP110, MSTP109,
 	MSTP100,
 	MSTP030,
 	MSTP029, MSTP028, MSTP027, MSTP026, MSTP025, MSTP024, MSTP023, MSTP022, MSTP021,
@@ -119,6 +120,8 @@ static struct clk mstp_clks[MSTP_NR] = {
 	[MSTP322] = SH_CLK_MSTP32(&p_clk, MSTPCR3, 22, 0), /* SDHI1 */
 	[MSTP321] = SH_CLK_MSTP32(&p_clk, MSTPCR3, 21, 0), /* SDHI2 */
 	[MSTP114] = SH_CLK_MSTP32(&p_clk, MSTPCR1, 14, 0), /* Ether */
+	[MSTP110] = SH_CLK_MSTP32(&s_clk, MSTPCR1, 10, 0), /* VIN0 */
+	[MSTP109] = SH_CLK_MSTP32(&s_clk, MSTPCR1,  9, 0), /* VIN1 */
 	[MSTP100] = SH_CLK_MSTP32(&p_clk, MSTPCR1,  0, 0), /* USB0/1 */
 	[MSTP030] = SH_CLK_MSTP32(&p_clk, MSTPCR0, 30, 0), /* I2C0 */
 	[MSTP029] = SH_CLK_MSTP32(&p_clk, MSTPCR0, 29, 0), /* I2C1 */
@@ -146,6 +149,8 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP322]), /* SDHI1 */
 	CLKDEV_DEV_ID("sh_mobile_sdhi.2", &mstp_clks[MSTP321]), /* SDHI2 */
 	CLKDEV_DEV_ID("r8a777x-ether", &mstp_clks[MSTP114]), /* Ether */
+	CLKDEV_DEV_ID("r8a7778-vin.0", &mstp_clks[MSTP110]), /* VIN0 */
+	CLKDEV_DEV_ID("r8a7778-vin.1", &mstp_clks[MSTP109]), /* VIN1 */
 	CLKDEV_DEV_ID("ehci-platform", &mstp_clks[MSTP100]), /* USB EHCI port0/1 */
 	CLKDEV_DEV_ID("ohci-platform", &mstp_clks[MSTP100]), /* USB OHCI port0/1 */
 	CLKDEV_DEV_ID("i2c-rcar.0", &mstp_clks[MSTP030]), /* I2C0 */
diff --git a/arch/arm/mach-shmobile/clock-r8a7779.c b/arch/arm/mach-shmobile/clock-r8a7779.c
index 10340f5becbb..bd6ad922eb7e 100644
--- a/arch/arm/mach-shmobile/clock-r8a7779.c
+++ b/arch/arm/mach-shmobile/clock-r8a7779.c
@@ -112,7 +112,9 @@ static struct clk *main_clks[] = {
 };
 
 enum { MSTP323, MSTP322, MSTP321, MSTP320,
+	MSTP120,
 	MSTP116, MSTP115, MSTP114,
+	MSTP110, MSTP109, MSTP108,
 	MSTP103, MSTP101, MSTP100,
 	MSTP030,
 	MSTP029, MSTP028, MSTP027, MSTP026, MSTP025, MSTP024, MSTP023, MSTP022, MSTP021,
@@ -125,9 +127,13 @@ static struct clk mstp_clks[MSTP_NR] = {
 	[MSTP322] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 22, 0), /* SDHI1 */
 	[MSTP321] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 21, 0), /* SDHI2 */
 	[MSTP320] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 20, 0), /* SDHI3 */
+	[MSTP120] = SH_CLK_MSTP32(&clks_clk, MSTPCR1, 20, 0), /* VIN3 */
 	[MSTP116] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 16, 0), /* PCIe */
 	[MSTP115] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 15, 0), /* SATA */
 	[MSTP114] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 14, 0), /* Ether */
+	[MSTP110] = SH_CLK_MSTP32(&clks_clk, MSTPCR1, 10, 0), /* VIN0 */
+	[MSTP109] = SH_CLK_MSTP32(&clks_clk, MSTPCR1,  9, 0), /* VIN1 */
+	[MSTP108] = SH_CLK_MSTP32(&clks_clk, MSTPCR1,  8, 0), /* VIN2 */
 	[MSTP103] = SH_CLK_MSTP32(&clks_clk, MSTPCR1,  3, 0), /* DU */
 	[MSTP101] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1,  1, 0), /* USB2 */
 	[MSTP100] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1,  0, 0), /* USB0/1 */
@@ -162,10 +168,14 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_CON_ID("peripheral_clk",	&clkp_clk),
 
 	/* MSTP32 clocks */
+	CLKDEV_DEV_ID("r8a7779-vin.3", &mstp_clks[MSTP120]), /* VIN3 */
 	CLKDEV_DEV_ID("rcar-pcie", &mstp_clks[MSTP116]), /* PCIe */
 	CLKDEV_DEV_ID("sata_rcar", &mstp_clks[MSTP115]), /* SATA */
 	CLKDEV_DEV_ID("fc600000.sata", &mstp_clks[MSTP115]), /* SATA w/DT */
 	CLKDEV_DEV_ID("r8a777x-ether", &mstp_clks[MSTP114]), /* Ether */
+	CLKDEV_DEV_ID("r8a7779-vin.0", &mstp_clks[MSTP110]), /* VIN0 */
+	CLKDEV_DEV_ID("r8a7779-vin.1", &mstp_clks[MSTP109]), /* VIN1 */
+	CLKDEV_DEV_ID("r8a7779-vin.2", &mstp_clks[MSTP108]), /* VIN2 */
 	CLKDEV_DEV_ID("ehci-platform.1", &mstp_clks[MSTP101]), /* USB EHCI port2 */
 	CLKDEV_DEV_ID("ohci-platform.1", &mstp_clks[MSTP101]), /* USB OHCI port2 */
 	CLKDEV_DEV_ID("ehci-platform.0", &mstp_clks[MSTP100]), /* USB EHCI port0/1 */
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7778.h b/arch/arm/mach-shmobile/include/mach/r8a7778.h
index 851d027a2f06..a7c6d151cdd5 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7778.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7778.h
@@ -22,6 +22,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/sh_eth.h>
 #include <linux/platform_data/usb-rcar-phy.h>
+#include <linux/platform_data/camera-rcar.h>
 
 extern void r8a7778_add_standard_devices(void);
 extern void r8a7778_add_standard_devices_dt(void);
@@ -30,6 +31,8 @@ extern void r8a7778_add_usb_phy_device(struct rcar_phy_platform_data *pdata);
 extern void r8a7778_add_i2c_device(int id);
 extern void r8a7778_add_hspi_device(int id);
 extern void r8a7778_add_mmc_device(struct sh_mmcif_plat_data *info);
+extern void r8a7778_add_vin_device(int id,
+				   struct rcar_vin_platform_data *pdata);
 
 extern void r8a7778_init_late(void);
 extern void r8a7778_init_delay(void);
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7779.h b/arch/arm/mach-shmobile/include/mach/r8a7779.h
index fc47073c7ba9..6d2b6417fe2a 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7779.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7779.h
@@ -5,6 +5,7 @@
 #include <linux/pm_domain.h>
 #include <linux/sh_eth.h>
 #include <linux/platform_data/usb-rcar-phy.h>
+#include <linux/platform_data/camera-rcar.h>
 
 struct platform_device;
 
@@ -35,6 +36,8 @@ extern void r8a7779_add_standard_devices(void);
 extern void r8a7779_add_standard_devices_dt(void);
 extern void r8a7779_add_ether_device(struct sh_eth_plat_data *pdata);
 extern void r8a7779_add_usb_phy_device(struct rcar_phy_platform_data *pdata);
+extern void r8a7779_add_vin_device(int idx,
+				   struct rcar_vin_platform_data *pdata);
 extern void r8a7779_init_late(void);
 extern void r8a7779_clock_init(void);
 extern void r8a7779_pinmux_init(void);
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index 80c20392ad7c..0174f059eac3 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -333,6 +333,40 @@ void __init r8a7778_add_mmc_device(struct sh_mmcif_plat_data *info)
 		info, sizeof(*info));
 }
 
+/* VIN */
+#define R8A7778_VIN(idx)						\
+static struct resource vin##idx##_resources[] __initdata = {		\
+	DEFINE_RES_MEM(0xffc50000 + 0x1000 * (idx), 0x1000),		\
+	DEFINE_RES_IRQ(gic_iid(0x5a)),					\
+};									\
+									\
+static struct platform_device_info vin##idx##_info __initdata = {	\
+	.parent		= &platform_bus,				\
+	.name		= "r8a7778-vin",				\
+	.id		= idx,						\
+	.res		= vin##idx##_resources,				\
+	.num_res	= ARRAY_SIZE(vin##idx##_resources),		\
+	.dma_mask	= DMA_BIT_MASK(32),				\
+}
+
+R8A7778_VIN(0);
+R8A7778_VIN(1);
+
+static struct platform_device_info *vin_info_table[] __initdata = {
+	&vin0_info,
+	&vin1_info,
+};
+
+void __init r8a7778_add_vin_device(int id, struct rcar_vin_platform_data *pdata)
+{
+	BUG_ON(id < 0 || id > 1);
+
+	vin_info_table[id]->data = pdata;
+	vin_info_table[id]->size_data = sizeof(*pdata);
+
+	platform_device_register_full(vin_info_table[id]);
+}
+
 void __init r8a7778_add_standard_devices(void)
 {
 	int i;
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 398687761f50..3d8928895503 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -559,6 +559,33 @@ static struct resource ether_resources[] = {
 	},
 };
 
+#define R8A7779_VIN(idx) \
+static struct resource vin##idx##_resources[] __initdata = {		\
+	DEFINE_RES_MEM(0xffc50000 + 0x1000 * (idx), 0x1000),		\
+	DEFINE_RES_IRQ(gic_iid(0x5f + (idx))),				\
+};									\
+									\
+static struct platform_device_info vin##idx##_info __initdata = {	\
+	.parent		= &platform_bus,				\
+	.name		= "r8a7779-vin",				\
+	.id		= idx,						\
+	.res		= vin##idx##_resources,				\
+	.num_res	= ARRAY_SIZE(vin##idx##_resources),		\
+	.dma_mask	= DMA_BIT_MASK(32),				\
+}
+
+R8A7779_VIN(0);
+R8A7779_VIN(1);
+R8A7779_VIN(2);
+R8A7779_VIN(3);
+
+static struct platform_device_info *vin_info_table[] __initdata = {
+	&vin0_info,
+	&vin1_info,
+	&vin2_info,
+	&vin3_info,
+};
+
 static struct platform_device *r8a7779_devices_dt[] __initdata = {
 	&scif0_device,
 	&scif1_device,
@@ -610,6 +637,16 @@ void __init r8a7779_add_usb_phy_device(struct rcar_phy_platform_data *pdata)
 					  pdata, sizeof(*pdata));
 }
 
+void __init r8a7779_add_vin_device(int id, struct rcar_vin_platform_data *pdata)
+{
+	BUG_ON(id < 0 || id > 3);
+
+	vin_info_table[id]->data = pdata;
+	vin_info_table[id]->size_data = sizeof(*pdata);
+
+	platform_device_register_full(vin_info_table[id]);
+}
+
 /* do nothing for !CONFIG_SMP or !CONFIG_HAVE_TWD */
 void __init __weak r8a7779_register_twd(void) { }
 
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 0d1e4128d460..fc97cfd52769 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -29,7 +29,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/pda_power.h>
-#include <linux/platform_data/tegra_usb.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/sys_soc.h>
@@ -46,40 +45,6 @@
 #include "fuse.h"
 #include "iomap.h"
 
-static struct tegra_ehci_platform_data tegra_ehci1_pdata = {
-	.operating_mode = TEGRA_USB_OTG,
-	.power_down_on_bus_suspend = 1,
-	.vbus_gpio = -1,
-};
-
-static struct tegra_ulpi_config tegra_ehci2_ulpi_phy_config = {
-	.reset_gpio = -1,
-	.clk = "cdev2",
-};
-
-static struct tegra_ehci_platform_data tegra_ehci2_pdata = {
-	.phy_config = &tegra_ehci2_ulpi_phy_config,
-	.operating_mode = TEGRA_USB_HOST,
-	.power_down_on_bus_suspend = 1,
-	.vbus_gpio = -1,
-};
-
-static struct tegra_ehci_platform_data tegra_ehci3_pdata = {
-	.operating_mode = TEGRA_USB_HOST,
-	.power_down_on_bus_suspend = 1,
-	.vbus_gpio = -1,
-};
-
-static struct of_dev_auxdata tegra20_auxdata_lookup[] __initdata = {
-	OF_DEV_AUXDATA("nvidia,tegra20-ehci", 0xC5000000, "tegra-ehci.0",
-		       &tegra_ehci1_pdata),
-	OF_DEV_AUXDATA("nvidia,tegra20-ehci", 0xC5004000, "tegra-ehci.1",
-		       &tegra_ehci2_pdata),
-	OF_DEV_AUXDATA("nvidia,tegra20-ehci", 0xC5008000, "tegra-ehci.2",
-		       &tegra_ehci3_pdata),
-	{}
-};
-
 static void __init tegra_dt_init(void)
 {
 	struct soc_device_attribute *soc_dev_attr;
@@ -112,8 +77,7 @@ static void __init tegra_dt_init(void)
 	 * devices
 	 */
 out:
-	of_platform_populate(NULL, of_default_bus_match_table,
-				tegra20_auxdata_lookup, parent);
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
 }
 
 static void __init trimslice_init(void)
diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index bf9b6be5b180..fe1f3e26b88b 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -4,7 +4,6 @@
 
 obj-y				:= cpu.o devices.o devices-common.o \
 				   id.o usb.o timer.o pm.o
-obj-$(CONFIG_CPU_IDLE)          += cpuidle.o
 obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o
 obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o devices-db8500.o
 obj-$(CONFIG_MACH_MOP500)	+= board-mop500.o board-mop500-sdi.o \
diff --git a/arch/arm/mach-ux500/cpuidle.c b/arch/arm/mach-ux500/cpuidle.c
deleted file mode 100644
index a45dd09daed9..000000000000
--- a/arch/arm/mach-ux500/cpuidle.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM)
- *
- * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com>
- * and Jonas Aaberg <jonas.aberg@stericsson.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/cpuidle.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/smp.h>
-#include <linux/mfd/dbx500-prcmu.h>
-#include <linux/platform_data/arm-ux500-pm.h>
-
-#include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
-
-#include "db8500-regs.h"
-#include "id.h"
-
-static atomic_t master = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(master_lock);
-
-static inline int ux500_enter_idle(struct cpuidle_device *dev,
-				   struct cpuidle_driver *drv, int index)
-{
-	int this_cpu = smp_processor_id();
-	bool recouple = false;
-
-	if (atomic_inc_return(&master) == num_online_cpus()) {
-
-		/* With this lock, we prevent the other cpu to exit and enter
-		 * this function again and become the master */
-		if (!spin_trylock(&master_lock))
-			goto wfi;
-
-		/* decouple the gic from the A9 cores */
-		if (prcmu_gic_decouple()) {
-			spin_unlock(&master_lock);
-			goto out;
-		}
-
-		/* If an error occur, we will have to recouple the gic
-		 * manually */
-		recouple = true;
-
-		/* At this state, as the gic is decoupled, if the other
-		 * cpu is in WFI, we have the guarantee it won't be wake
-		 * up, so we can safely go to retention */
-		if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
-			goto out;
-
-		/* The prcmu will be in charge of watching the interrupts
-		 * and wake up the cpus */
-		if (prcmu_copy_gic_settings())
-			goto out;
-
-		/* Check in the meantime an interrupt did
-		 * not occur on the gic ... */
-		if (prcmu_gic_pending_irq())
-			goto out;
-
-		/* ... and the prcmu */
-		if (prcmu_pending_irq())
-			goto out;
-
-		/* Go to the retention state, the prcmu will wait for the
-		 * cpu to go WFI and this is what happens after exiting this
-		 * 'master' critical section */
-		if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
-			goto out;
-
-		/* When we switch to retention, the prcmu is in charge
-		 * of recoupling the gic automatically */
-		recouple = false;
-
-		spin_unlock(&master_lock);
-	}
-wfi:
-	cpu_do_idle();
-out:
-	atomic_dec(&master);
-
-	if (recouple) {
-		prcmu_gic_recouple();
-		spin_unlock(&master_lock);
-	}
-
-	return index;
-}
-
-static struct cpuidle_driver ux500_idle_driver = {
-	.name = "ux500_idle",
-	.owner = THIS_MODULE,
-	.states = {
-		ARM_CPUIDLE_WFI_STATE,
-		{
-			.enter		  = ux500_enter_idle,
-			.exit_latency	  = 70,
-			.target_residency = 260,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID |
-			                    CPUIDLE_FLAG_TIMER_STOP,
-			.name		  = "ApIdle",
-			.desc		  = "ARM Retention",
-		},
-	},
-	.safe_state_index = 0,
-	.state_count = 2,
-};
-
-int __init ux500_idle_init(void)
-{
-	if (!(cpu_is_u8500_family() || cpu_is_ux540_family()))
-		return -ENODEV;
-
-	/* Configure wake up reasons */
-	prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
-			     PRCMU_WAKEUP(ABB));
-
-	return cpuidle_register(&ux500_idle_driver, NULL);
-}
-
-device_initcall(ux500_idle_init);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7f9b1798c6cf..dbddc07a3bbd 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 8e11e96eab5e..c83f27b6bdda 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -30,6 +30,8 @@
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -60,6 +62,30 @@ struct ssp_device *pxa_ssp_request(int port, const char *label)
 }
 EXPORT_SYMBOL(pxa_ssp_request);
 
+struct ssp_device *pxa_ssp_request_of(const struct device_node *of_node,
+				      const char *label)
+{
+	struct ssp_device *ssp = NULL;
+
+	mutex_lock(&ssp_lock);
+
+	list_for_each_entry(ssp, &ssp_list, node) {
+		if (ssp->of_node == of_node && ssp->use_count == 0) {
+			ssp->use_count++;
+			ssp->label = label;
+			break;
+		}
+	}
+
+	mutex_unlock(&ssp_lock);
+
+	if (&ssp->node == &ssp_list)
+		return NULL;
+
+	return ssp;
+}
+EXPORT_SYMBOL(pxa_ssp_request_of);
+
 void pxa_ssp_free(struct ssp_device *ssp)
 {
 	mutex_lock(&ssp_lock);
@@ -72,96 +98,126 @@ void pxa_ssp_free(struct ssp_device *ssp)
 }
 EXPORT_SYMBOL(pxa_ssp_free);
 
+#ifdef CONFIG_OF
+static const struct of_device_id pxa_ssp_of_ids[] = {
+	{ .compatible = "mrvl,pxa25x-ssp",	.data = (void *) PXA25x_SSP },
+	{ .compatible = "mvrl,pxa25x-nssp",	.data = (void *) PXA25x_NSSP },
+	{ .compatible = "mrvl,pxa27x-ssp",	.data = (void *) PXA27x_SSP },
+	{ .compatible = "mrvl,pxa3xx-ssp",	.data = (void *) PXA3xx_SSP },
+	{ .compatible = "mvrl,pxa168-ssp",	.data = (void *) PXA168_SSP },
+	{ .compatible = "mrvl,pxa910-ssp",	.data = (void *) PXA910_SSP },
+	{ .compatible = "mrvl,ce4100-ssp",	.data = (void *) CE4100_SSP },
+	{ .compatible = "mrvl,lpss-ssp",	.data = (void *) LPSS_SSP },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, pxa_ssp_of_ids);
+#endif
+
 static int pxa_ssp_probe(struct platform_device *pdev)
 {
-	const struct platform_device_id *id = platform_get_device_id(pdev);
 	struct resource *res;
 	struct ssp_device *ssp;
-	int ret = 0;
+	struct device *dev = &pdev->dev;
 
-	ssp = kzalloc(sizeof(struct ssp_device), GFP_KERNEL);
-	if (ssp == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory");
+	ssp = devm_kzalloc(dev, sizeof(struct ssp_device), GFP_KERNEL);
+	if (ssp == NULL)
 		return -ENOMEM;
-	}
-	ssp->pdev = pdev;
 
-	ssp->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(ssp->clk)) {
-		ret = PTR_ERR(ssp->clk);
-		goto err_free;
-	}
+	ssp->pdev = pdev;
 
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "no SSP RX DRCMR defined\n");
-		ret = -ENODEV;
-		goto err_free_clk;
-	}
-	ssp->drcmr_rx = res->start;
+	ssp->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(ssp->clk))
+		return PTR_ERR(ssp->clk);
+
+	if (dev->of_node) {
+		struct of_phandle_args dma_spec;
+		struct device_node *np = dev->of_node;
+
+		/*
+		 * FIXME: we should allocate the DMA channel from this
+		 * context and pass the channel down to the ssp users.
+		 * For now, we lookup the rx and tx indices manually
+		 */
+
+		/* rx */
+		of_parse_phandle_with_args(np, "dmas", "#dma-cells",
+					   0, &dma_spec);
+		ssp->drcmr_rx = dma_spec.args[0];
+		of_node_put(dma_spec.np);
+
+		/* tx */
+		of_parse_phandle_with_args(np, "dmas", "#dma-cells",
+					   1, &dma_spec);
+		ssp->drcmr_tx = dma_spec.args[0];
+		of_node_put(dma_spec.np);
+	} else {
+		res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+		if (res == NULL) {
+			dev_err(dev, "no SSP RX DRCMR defined\n");
+			return -ENODEV;
+		}
+		ssp->drcmr_rx = res->start;
 
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "no SSP TX DRCMR defined\n");
-		ret = -ENODEV;
-		goto err_free_clk;
+		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+		if (res == NULL) {
+			dev_err(dev, "no SSP TX DRCMR defined\n");
+			return -ENODEV;
+		}
+		ssp->drcmr_tx = res->start;
 	}
-	ssp->drcmr_tx = res->start;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
-		dev_err(&pdev->dev, "no memory resource defined\n");
-		ret = -ENODEV;
-		goto err_free_clk;
+		dev_err(dev, "no memory resource defined\n");
+		return -ENODEV;
 	}
 
-	res = request_mem_region(res->start, resource_size(res),
-			pdev->name);
+	res = devm_request_mem_region(dev, res->start, resource_size(res),
+				      pdev->name);
 	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to request memory resource\n");
-		ret = -EBUSY;
-		goto err_free_clk;
+		dev_err(dev, "failed to request memory resource\n");
+		return -EBUSY;
 	}
 
 	ssp->phys_base = res->start;
 
-	ssp->mmio_base = ioremap(res->start, resource_size(res));
+	ssp->mmio_base = devm_ioremap(dev, res->start, resource_size(res));
 	if (ssp->mmio_base == NULL) {
-		dev_err(&pdev->dev, "failed to ioremap() registers\n");
-		ret = -ENODEV;
-		goto err_free_mem;
+		dev_err(dev, "failed to ioremap() registers\n");
+		return -ENODEV;
 	}
 
 	ssp->irq = platform_get_irq(pdev, 0);
 	if (ssp->irq < 0) {
-		dev_err(&pdev->dev, "no IRQ resource defined\n");
-		ret = -ENODEV;
-		goto err_free_io;
+		dev_err(dev, "no IRQ resource defined\n");
+		return -ENODEV;
+	}
+
+	if (dev->of_node) {
+		const struct of_device_id *id =
+			of_match_device(of_match_ptr(pxa_ssp_of_ids), dev);
+		ssp->type = (int) id->data;
+	} else {
+		const struct platform_device_id *id =
+			platform_get_device_id(pdev);
+		ssp->type = (int) id->driver_data;
+
+		/* PXA2xx/3xx SSP ports starts from 1 and the internal pdev->id
+		 * starts from 0, do a translation here
+		 */
+		ssp->port_id = pdev->id + 1;
 	}
 
-	/* PXA2xx/3xx SSP ports starts from 1 and the internal pdev->id
-	 * starts from 0, do a translation here
-	 */
-	ssp->port_id = pdev->id + 1;
 	ssp->use_count = 0;
-	ssp->type = (int)id->driver_data;
+	ssp->of_node = dev->of_node;
 
 	mutex_lock(&ssp_lock);
 	list_add(&ssp->node, &ssp_list);
 	mutex_unlock(&ssp_lock);
 
 	platform_set_drvdata(pdev, ssp);
-	return 0;
 
-err_free_io:
-	iounmap(ssp->mmio_base);
-err_free_mem:
-	release_mem_region(res->start, resource_size(res));
-err_free_clk:
-	clk_put(ssp->clk);
-err_free:
-	kfree(ssp);
-	return ret;
+	return 0;
 }
 
 static int pxa_ssp_remove(struct platform_device *pdev)
@@ -201,8 +257,9 @@ static struct platform_driver pxa_ssp_driver = {
 	.probe		= pxa_ssp_probe,
 	.remove		= pxa_ssp_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "pxa2xx-ssp",
+		.owner		= THIS_MODULE,
+		.name		= "pxa2xx-ssp",
+		.of_match_table	= of_match_ptr(pxa_ssp_of_ids),
 	},
 	.id_table	= ssp_id_table,
 };
diff --git a/arch/arm/plat-samsung/s3c-dma-ops.c b/arch/arm/plat-samsung/s3c-dma-ops.c
index 0cc40aea3f5a..98b10ba67dc7 100644
--- a/arch/arm/plat-samsung/s3c-dma-ops.c
+++ b/arch/arm/plat-samsung/s3c-dma-ops.c
@@ -82,7 +82,8 @@ static int s3c_dma_config(unsigned ch, struct samsung_dma_config *param)
 static int s3c_dma_prepare(unsigned ch, struct samsung_dma_prep *param)
 {
 	struct cb_data *data;
-	int len = (param->cap == DMA_CYCLIC) ? param->period : param->len;
+	dma_addr_t pos = param->buf;
+	dma_addr_t end = param->buf + param->len;
 
 	list_for_each_entry(data, &dma_list, node)
 		if (data->ch == ch)
@@ -94,7 +95,15 @@ static int s3c_dma_prepare(unsigned ch, struct samsung_dma_prep *param)
 		data->fp_param = param->fp_param;
 	}
 
-	s3c2410_dma_enqueue(ch, (void *)data, param->buf, len);
+	if (param->cap != DMA_CYCLIC) {
+		s3c2410_dma_enqueue(ch, (void *)data, param->buf, param->len);
+		return 0;
+	}
+
+	while (pos < end) {
+		s3c2410_dma_enqueue(ch, (void *)data, pos, param->period);
+		pos += param->period;
+	}
 
 	return 0;
 }
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 98abd476992d..c9f1d2816c2b 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -26,7 +26,13 @@
 
 #include <clocksource/arm_arch_timer.h>
 
-static inline void arch_timer_reg_write(int access, int reg, u32 val)
+/*
+ * These register accessors are marked inline so the compiler can
+ * nicely work out which register we want, and chuck away the rest of
+ * the code.
+ */
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 {
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
@@ -36,8 +42,6 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
@@ -47,17 +51,14 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
-	} else {
-		BUILD_BUG();
 	}
 
 	isb();
 }
 
-static inline u32 arch_timer_reg_read(int access, int reg)
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 {
 	u32 val;
 
@@ -69,8 +70,6 @@ static inline u32 arch_timer_reg_read(int access, int reg)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
@@ -80,11 +79,7 @@ static inline u32 arch_timer_reg_read(int access, int reg)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
-	} else {
-		BUILD_BUG();
 	}
 
 	return val;
diff --git a/arch/avr32/oprofile/op_model_avr32.c b/arch/avr32/oprofile/op_model_avr32.c
index f74b7809e089..08308be2c02c 100644
--- a/arch/avr32/oprofile/op_model_avr32.c
+++ b/arch/avr32/oprofile/op_model_avr32.c
@@ -97,8 +97,7 @@ static irqreturn_t avr32_perf_counter_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int avr32_perf_counter_create_files(struct super_block *sb,
-		struct dentry *root)
+static int avr32_perf_counter_create_files(struct dentry *root)
 {
 	struct dentry *dir;
 	unsigned int i;
@@ -106,21 +105,21 @@ static int avr32_perf_counter_create_files(struct super_block *sb,
 
 	for (i = 0; i < NR_counter; i++) {
 		snprintf(filename, sizeof(filename), "%u", i);
-		dir = oprofilefs_mkdir(sb, root, filename);
+		dir = oprofilefs_mkdir(root, filename);
 
-		oprofilefs_create_ulong(sb, dir, "enabled",
+		oprofilefs_create_ulong(dir, "enabled",
 				&counter[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event",
+		oprofilefs_create_ulong(dir, "event",
 				&counter[i].event);
-		oprofilefs_create_ulong(sb, dir, "count",
+		oprofilefs_create_ulong(dir, "count",
 				&counter[i].count);
 
 		/* Dummy entries */
-		oprofilefs_create_ulong(sb, dir, "kernel",
+		oprofilefs_create_ulong(dir, "kernel",
 				&counter[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user",
+		oprofilefs_create_ulong(dir, "user",
 				&counter[i].user);
-		oprofilefs_create_ulong(sb, dir, "unit_mask",
+		oprofilefs_create_ulong(dir, "unit_mask",
 				&counter[i].unit_mask);
 	}
 
diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c
index 0aa35f0eb0db..deb67843693c 100644
--- a/arch/frv/mb93090-mb00/pci-vdk.c
+++ b/arch/frv/mb93090-mb00/pci-vdk.c
@@ -320,7 +320,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
  *  are examined.
  */
 
-void __init pcibios_fixup_bus(struct pci_bus *bus)
+void pcibios_fixup_bus(struct pci_bus *bus)
 {
 #if 0
 	printk("### PCIBIOS_FIXUP_BUS(%d)\n",bus->number);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 5a768ad8e893..566642266324 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -43,6 +43,7 @@ config IA64
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select ARCH_USE_CMPXCHG_LOCKREF
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -565,9 +566,9 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
 
 config CRASH_DUMP
 	  bool "kernel crash dumps"
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 05b03ecd7933..a3456f34f672 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += vtime.h
+\ No newline at end of file
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 54ff557d474e..45698cd15b7b 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -102,6 +102,11 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
 }
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return !(((lock.lock >> TICKET_SHIFT) ^ lock.lock) & TICKET_MASK);
+}
+
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b2dc0d10c8f..bdfd8789b376 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c
index 6083088c0cca..dacd9f911f71 100644
--- a/arch/m68k/amiga/platform.c
+++ b/arch/m68k/amiga/platform.c
@@ -56,7 +56,7 @@ static int __init amiga_init_bus(void)
 	n = AMIGAHW_PRESENT(ZORRO3) ? 4 : 2;
 	pdev = platform_device_register_simple("amiga-zorro", -1,
 					       zorro_resources, n);
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 subsys_initcall(amiga_init_bus);
diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c
index fa277aecfb78..121a6660ad4e 100644
--- a/arch/m68k/emu/natfeat.c
+++ b/arch/m68k/emu/natfeat.c
@@ -18,11 +18,11 @@
 #include <asm/machdep.h>
 #include <asm/natfeat.h>
 
-extern long nf_get_id2(const char *feature_name);
+extern long nf_get_id_phys(unsigned long feature_name);
 
 asm("\n"
-"	.global nf_get_id2,nf_call\n"
-"nf_get_id2:\n"
+"	.global nf_get_id_phys,nf_call\n"
+"nf_get_id_phys:\n"
 "	.short	0x7300\n"
 "	rts\n"
 "nf_call:\n"
@@ -31,7 +31,7 @@ asm("\n"
 "1:	moveq.l	#0,%d0\n"
 "	rts\n"
 "	.section __ex_table,\"a\"\n"
-"	.long	nf_get_id2,1b\n"
+"	.long	nf_get_id_phys,1b\n"
 "	.long	nf_call,1b\n"
 "	.previous");
 EXPORT_SYMBOL_GPL(nf_call);
@@ -46,7 +46,7 @@ long nf_get_id(const char *feature_name)
 	if (n >= sizeof(name_copy))
 		return 0;
 
-	return nf_get_id2(name_copy);
+	return nf_get_id_phys(virt_to_phys(name_copy));
 }
 EXPORT_SYMBOL_GPL(nf_get_id);
 
@@ -58,7 +58,7 @@ void nfprint(const char *fmt, ...)
 
 	va_start(ap, fmt);
 	n = vsnprintf(buf, 256, fmt, ap);
-	nf_call(nf_get_id("NF_STDERR"), buf);
+	nf_call(nf_get_id("NF_STDERR"), virt_to_phys(buf));
 	va_end(ap);
 }
 
@@ -83,7 +83,7 @@ void nf_init(void)
 	id = nf_get_id("NF_NAME");
 	if (!id)
 		return;
-	nf_call(id, buf, 256);
+	nf_call(id, virt_to_phys(buf), 256);
 	buf[255] = 0;
 
 	pr_info("NatFeats found (%s, %lu.%lu)\n", buf, version >> 16,
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index e3011338ab40..0721858fbd1e 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -41,8 +41,8 @@ static inline s32 nfhd_read_write(u32 major, u32 minor, u32 rwflag, u32 recno,
 static inline s32 nfhd_get_capacity(u32 major, u32 minor, u32 *blocks,
 				    u32 *blocksize)
 {
-	return nf_call(nfhd_id + NFHD_GET_CAPACITY, major, minor, blocks,
-		       blocksize);
+	return nf_call(nfhd_id + NFHD_GET_CAPACITY, major, minor,
+		       virt_to_phys(blocks), virt_to_phys(blocksize));
 }
 
 static LIST_HEAD(nfhd_list);
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index 6685bf45c2c3..57e8c8fb5eba 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/uaccess.h>
+#include <linux/io.h>
 
 #include <asm/natfeat.h>
 
@@ -25,17 +26,18 @@ static struct tty_driver *nfcon_tty_driver;
 static void nfputs(const char *str, unsigned int count)
 {
 	char buf[68];
+	unsigned long phys = virt_to_phys(buf);
 
 	buf[64] = 0;
 	while (count > 64) {
 		memcpy(buf, str, 64);
-		nf_call(stderr_id, buf);
+		nf_call(stderr_id, phys);
 		str += 64;
 		count -= 64;
 	}
 	memcpy(buf, str, count);
 	buf[count] = 0;
-	nf_call(stderr_id, buf);
+	nf_call(stderr_id, phys);
 }
 
 static void nfcon_write(struct console *con, const char *str,
@@ -79,7 +81,7 @@ static int nfcon_tty_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	char temp[2] = { ch, 0 };
 
-	nf_call(stderr_id, temp);
+	nf_call(stderr_id, virt_to_phys(temp));
 	return 1;
 }
 
diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c
index 695cd737a42e..a0985fd088d1 100644
--- a/arch/m68k/emu/nfeth.c
+++ b/arch/m68k/emu/nfeth.c
@@ -195,7 +195,8 @@ static struct net_device * __init nfeth_probe(int unit)
 	char mac[ETH_ALEN], host_ip[32], local_ip[32];
 	int err;
 
-	if (!nf_call(nfEtherID + XIF_GET_MAC, unit, mac, ETH_ALEN))
+	if (!nf_call(nfEtherID + XIF_GET_MAC, unit, virt_to_phys(mac),
+		     ETH_ALEN))
 		return NULL;
 
 	dev = alloc_etherdev(sizeof(struct nfeth_private));
@@ -217,9 +218,9 @@ static struct net_device * __init nfeth_probe(int unit)
 	}
 
 	nf_call(nfEtherID + XIF_GET_IPHOST, unit,
-		host_ip, sizeof(host_ip));
+		virt_to_phys(host_ip), sizeof(host_ip));
 	nf_call(nfEtherID + XIF_GET_IPATARI, unit,
-		local_ip, sizeof(local_ip));
+		virt_to_phys(local_ip), sizeof(local_ip));
 
 	netdev_info(dev, KBUILD_MODNAME " addr:%s (%s) HWaddr:%pM\n", host_ip,
 		    local_ip, mac);
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index 7ef4115b8c4a..a823cd73dc09 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 #ifdef CONFIG_MMU
-#include <linux/hardirq.h>
+#include <linux/preempt_mask.h>
 #endif
 #include <linux/preempt.h>
 #include <asm/thread_info.h>
@@ -67,6 +67,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
+	if (MACH_IS_ATARI) {
+		/* Ignore HSYNC = ipl 2 on Atari */
+		return (flags & ~(ALLOWINT | 0x200)) != 0;
+	}
 	return (flags & ~ALLOWINT) != 0;
 }
 
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index bea6bcf8f9b8..7eb9792009f8 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -90,7 +90,7 @@ static int __init rtc_init(void)
 		return -ENODEV;
 
 	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 module_init(rtc_init);
diff --git a/arch/m68k/platform/coldfire/pci.c b/arch/m68k/platform/coldfire/pci.c
index b33f97a13e6d..df9679238b6d 100644
--- a/arch/m68k/platform/coldfire/pci.c
+++ b/arch/m68k/platform/coldfire/pci.c
@@ -319,7 +319,6 @@ static int __init mcf_pci_init(void)
 	pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
 	pci_bus_size_bridges(rootbus);
 	pci_bus_assign_resources(rootbus);
-	pci_enable_bridges(rootbus);
 	return 0;
 }
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 658542b914fc..078bb744b5fe 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -338,6 +338,6 @@ static __init int q40_add_kbd_device(void)
 		return -ENODEV;
 
 	pdev = platform_device_register_simple("q40kbd", -1, NULL, 0);
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 arch_initcall(q40_add_kbd_device);
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 20c5e8e5121b..9977816c5ad3 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -50,9 +50,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 
 extern void kdump_move_device_tree(void);
 
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e12764c2a9d0..dccd7cec442d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2305,9 +2305,9 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
 
 config CRASH_DUMP
 	  bool "Kernel crash dumps"
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 1765bab000a0..faf84c5f2629 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -1335,8 +1335,9 @@ static ssize_t store_kill(struct device *dev, struct device_attribute *attr,
 
 	return len;
 }
+static DEVICE_ATTR(kill, S_IWUSR, NULL, store_kill);
 
-static ssize_t show_ntcs(struct device *cd, struct device_attribute *attr,
+static ssize_t ntcs_show(struct device *cd, struct device_attribute *attr,
 			 char *buf)
 {
 	struct vpe *vpe = get_vpe(tclimit);
@@ -1344,7 +1345,7 @@ static ssize_t show_ntcs(struct device *cd, struct device_attribute *attr,
 	return sprintf(buf, "%d\n", vpe->ntcs);
 }
 
-static ssize_t store_ntcs(struct device *dev, struct device_attribute *attr,
+static ssize_t ntcs_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t len)
 {
 	struct vpe *vpe = get_vpe(tclimit);
@@ -1365,12 +1366,14 @@ static ssize_t store_ntcs(struct device *dev, struct device_attribute *attr,
 out_einval:
 	return -EINVAL;
 }
+static DEVICE_ATTR_RW(ntcs);
 
-static struct device_attribute vpe_class_attributes[] = {
-	__ATTR(kill, S_IWUSR, NULL, store_kill),
-	__ATTR(ntcs, S_IRUGO | S_IWUSR, show_ntcs, store_ntcs),
-	{}
+static struct attribute vpe_attrs[] = {
+	&dev_attr_kill.attr,
+	&dev_attr_ntcs.attr,
+	NULL,
 };
+ATTRIBUTE_GROUPS(vpe);
 
 static void vpe_device_release(struct device *cd)
 {
@@ -1381,7 +1384,7 @@ struct class vpe_class = {
 	.name = "vpe",
 	.owner = THIS_MODULE,
 	.dev_release = vpe_device_release,
-	.dev_attrs = vpe_class_attributes,
+	.dev_groups = vpe_groups,
 };
 
 struct device vpe_device;
diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S
index dca2aa665993..bbace092ad0a 100644
--- a/arch/mips/kvm/kvm_locore.S
+++ b/arch/mips/kvm/kvm_locore.S
@@ -1,13 +1,13 @@
 /*
-* This file is subject to the terms and conditions of the GNU General Public
-* License.  See the file "COPYING" in the main directory of this archive
-* for more details.
-*
-* Main entry point for the guest, exception handling.
-*
-* Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
-* Authors: Sanjay Lal <sanjayl@kymasys.com>
-*/
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Main entry point for the guest, exception handling.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ */
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
@@ -55,195 +55,193 @@
  * a0: run
  * a1: vcpu
  */
+	.set	noreorder
+	.set	noat
 
 FEXPORT(__kvm_mips_vcpu_run)
-    .set    push
-    .set    noreorder
-    .set    noat
-
-    /* k0/k1 not being used in host kernel context */
-	addiu  		k1,sp, -PT_SIZE
-    LONG_S	    $0, PT_R0(k1)
-    LONG_S     	$1, PT_R1(k1)
-    LONG_S     	$2, PT_R2(k1)
-    LONG_S     	$3, PT_R3(k1)
-
-    LONG_S     	$4, PT_R4(k1)
-    LONG_S     	$5, PT_R5(k1)
-    LONG_S     	$6, PT_R6(k1)
-    LONG_S     	$7, PT_R7(k1)
-
-    LONG_S     	$8,  PT_R8(k1)
-    LONG_S     	$9,  PT_R9(k1)
-    LONG_S     	$10, PT_R10(k1)
-    LONG_S     	$11, PT_R11(k1)
-    LONG_S     	$12, PT_R12(k1)
-    LONG_S     	$13, PT_R13(k1)
-    LONG_S     	$14, PT_R14(k1)
-    LONG_S     	$15, PT_R15(k1)
-    LONG_S     	$16, PT_R16(k1)
-    LONG_S     	$17, PT_R17(k1)
-
-    LONG_S     	$18, PT_R18(k1)
-    LONG_S     	$19, PT_R19(k1)
-    LONG_S     	$20, PT_R20(k1)
-    LONG_S     	$21, PT_R21(k1)
-    LONG_S     	$22, PT_R22(k1)
-    LONG_S     	$23, PT_R23(k1)
-    LONG_S     	$24, PT_R24(k1)
-    LONG_S     	$25, PT_R25(k1)
+	/* k0/k1 not being used in host kernel context */
+	INT_ADDIU k1, sp, -PT_SIZE
+	LONG_S	$0, PT_R0(k1)
+	LONG_S	$1, PT_R1(k1)
+	LONG_S	$2, PT_R2(k1)
+	LONG_S	$3, PT_R3(k1)
+
+	LONG_S	$4, PT_R4(k1)
+	LONG_S	$5, PT_R5(k1)
+	LONG_S	$6, PT_R6(k1)
+	LONG_S	$7, PT_R7(k1)
+
+	LONG_S	$8,  PT_R8(k1)
+	LONG_S	$9,  PT_R9(k1)
+	LONG_S	$10, PT_R10(k1)
+	LONG_S	$11, PT_R11(k1)
+	LONG_S	$12, PT_R12(k1)
+	LONG_S	$13, PT_R13(k1)
+	LONG_S	$14, PT_R14(k1)
+	LONG_S	$15, PT_R15(k1)
+	LONG_S	$16, PT_R16(k1)
+	LONG_S	$17, PT_R17(k1)
+
+	LONG_S	$18, PT_R18(k1)
+	LONG_S	$19, PT_R19(k1)
+	LONG_S	$20, PT_R20(k1)
+	LONG_S	$21, PT_R21(k1)
+	LONG_S	$22, PT_R22(k1)
+	LONG_S	$23, PT_R23(k1)
+	LONG_S	$24, PT_R24(k1)
+	LONG_S	$25, PT_R25(k1)
 
 	/* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */
 
-    LONG_S     	$28, PT_R28(k1)
-    LONG_S     	$29, PT_R29(k1)
-    LONG_S     	$30, PT_R30(k1)
-    LONG_S     	$31, PT_R31(k1)
+	LONG_S	$28, PT_R28(k1)
+	LONG_S	$29, PT_R29(k1)
+	LONG_S	$30, PT_R30(k1)
+	LONG_S	$31, PT_R31(k1)
 
-    /* Save hi/lo */
-	mflo		v0
-	LONG_S		v0, PT_LO(k1)
-	mfhi   		v1
-	LONG_S		v1, PT_HI(k1)
+	/* Save hi/lo */
+	mflo	v0
+	LONG_S	v0, PT_LO(k1)
+	mfhi	v1
+	LONG_S	v1, PT_HI(k1)
 
 	/* Save host status */
-	mfc0		v0, CP0_STATUS
-	LONG_S		v0, PT_STATUS(k1)
+	mfc0	v0, CP0_STATUS
+	LONG_S	v0, PT_STATUS(k1)
 
 	/* Save host ASID, shove it into the BVADDR location */
-	mfc0 		v1,CP0_ENTRYHI
-	andi		v1, 0xff
-	LONG_S		v1, PT_HOST_ASID(k1)
+	mfc0	v1, CP0_ENTRYHI
+	andi	v1, 0xff
+	LONG_S	v1, PT_HOST_ASID(k1)
 
-    /* Save DDATA_LO, will be used to store pointer to vcpu */
-    mfc0        v1, CP0_DDATA_LO
-    LONG_S      v1, PT_HOST_USERLOCAL(k1)
+	/* Save DDATA_LO, will be used to store pointer to vcpu */
+	mfc0	v1, CP0_DDATA_LO
+	LONG_S	v1, PT_HOST_USERLOCAL(k1)
 
-    /* DDATA_LO has pointer to vcpu */
-    mtc0        a1,CP0_DDATA_LO
+	/* DDATA_LO has pointer to vcpu */
+	mtc0	a1, CP0_DDATA_LO
 
-    /* Offset into vcpu->arch */
-	addiu		k1, a1, VCPU_HOST_ARCH
+	/* Offset into vcpu->arch */
+	INT_ADDIU k1, a1, VCPU_HOST_ARCH
 
-    /* Save the host stack to VCPU, used for exception processing when we exit from the Guest */
-    LONG_S      sp, VCPU_HOST_STACK(k1)
+	/*
+	 * Save the host stack to VCPU, used for exception processing
+	 * when we exit from the Guest
+	 */
+	LONG_S	sp, VCPU_HOST_STACK(k1)
 
-    /* Save the kernel gp as well */
-    LONG_S      gp, VCPU_HOST_GP(k1)
+	/* Save the kernel gp as well */
+	LONG_S	gp, VCPU_HOST_GP(k1)
 
 	/* Setup status register for running the guest in UM, interrupts are disabled */
-	li			k0,(ST0_EXL | KSU_USER| ST0_BEV)
-	mtc0		k0,CP0_STATUS
-    ehb
-
-    /* load up the new EBASE */
-    LONG_L      k0, VCPU_GUEST_EBASE(k1)
-    mtc0        k0,CP0_EBASE
-
-    /* Now that the new EBASE has been loaded, unset BEV, set interrupt mask as it was
-     * but make sure that timer interrupts are enabled
-     */
-    li          k0,(ST0_EXL | KSU_USER | ST0_IE)
-    andi        v0, v0, ST0_IM
-    or          k0, k0, v0
-    mtc0        k0,CP0_STATUS
-    ehb
+	li	k0, (ST0_EXL | KSU_USER | ST0_BEV)
+	mtc0	k0, CP0_STATUS
+	ehb
+
+	/* load up the new EBASE */
+	LONG_L	k0, VCPU_GUEST_EBASE(k1)
+	mtc0	k0, CP0_EBASE
+
+	/*
+	 * Now that the new EBASE has been loaded, unset BEV, set
+	 * interrupt mask as it was but make sure that timer interrupts
+	 * are enabled
+	 */
+	li	k0, (ST0_EXL | KSU_USER | ST0_IE)
+	andi	v0, v0, ST0_IM
+	or	k0, k0, v0
+	mtc0	k0, CP0_STATUS
+	ehb
 
 
 	/* Set Guest EPC */
-	LONG_L		t0, VCPU_PC(k1)
-	mtc0		t0, CP0_EPC
+	LONG_L	t0, VCPU_PC(k1)
+	mtc0	t0, CP0_EPC
 
 FEXPORT(__kvm_mips_load_asid)
-    /* Set the ASID for the Guest Kernel */
-    sll         t0, t0, 1                       /* with kseg0 @ 0x40000000, kernel */
-                                                /* addresses shift to 0x80000000 */
-    bltz        t0, 1f                          /* If kernel */
-	addiu       t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-    addiu       t1, k1, VCPU_GUEST_USER_ASID    /* else user */
+	/* Set the ASID for the Guest Kernel */
+	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
+			        /* addresses shift to 0x80000000 */
+	bltz	t0, 1f		/* If kernel */
+	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
+	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
-    /* t1: contains the base of the ASID array, need to get the cpu id  */
-    LONG_L      t2, TI_CPU($28)             /* smp_processor_id */
-    sll         t2, t2, 2                   /* x4 */
-    addu        t3, t1, t2
-    LONG_L      k0, (t3)
-    andi        k0, k0, 0xff
-	mtc0		k0,CP0_ENTRYHI
-    ehb
-
-    /* Disable RDHWR access */
-    mtc0    zero,  CP0_HWRENA
-
-    /* Now load up the Guest Context from VCPU */
-    LONG_L     	$1, VCPU_R1(k1)
-    LONG_L     	$2, VCPU_R2(k1)
-    LONG_L     	$3, VCPU_R3(k1)
-
-    LONG_L     	$4, VCPU_R4(k1)
-    LONG_L     	$5, VCPU_R5(k1)
-    LONG_L     	$6, VCPU_R6(k1)
-    LONG_L     	$7, VCPU_R7(k1)
-
-    LONG_L     	$8,  VCPU_R8(k1)
-    LONG_L     	$9,  VCPU_R9(k1)
-    LONG_L     	$10, VCPU_R10(k1)
-    LONG_L     	$11, VCPU_R11(k1)
-    LONG_L     	$12, VCPU_R12(k1)
-    LONG_L     	$13, VCPU_R13(k1)
-    LONG_L     	$14, VCPU_R14(k1)
-    LONG_L     	$15, VCPU_R15(k1)
-    LONG_L     	$16, VCPU_R16(k1)
-    LONG_L     	$17, VCPU_R17(k1)
-    LONG_L     	$18, VCPU_R18(k1)
-    LONG_L     	$19, VCPU_R19(k1)
-    LONG_L     	$20, VCPU_R20(k1)
-    LONG_L     	$21, VCPU_R21(k1)
-    LONG_L     	$22, VCPU_R22(k1)
-    LONG_L     	$23, VCPU_R23(k1)
-    LONG_L     	$24, VCPU_R24(k1)
-    LONG_L     	$25, VCPU_R25(k1)
-
-    /* k0/k1 loaded up later */
-
-    LONG_L     	$28, VCPU_R28(k1)
-    LONG_L     	$29, VCPU_R29(k1)
-    LONG_L     	$30, VCPU_R30(k1)
-    LONG_L     	$31, VCPU_R31(k1)
-
-    /* Restore hi/lo */
-	LONG_L		k0, VCPU_LO(k1)
-	mtlo		k0
-
-	LONG_L		k0, VCPU_HI(k1)
-	mthi   		k0
+	     /* t1: contains the base of the ASID array, need to get the cpu id  */
+	LONG_L	t2, TI_CPU($28)             /* smp_processor_id */
+	INT_SLL	t2, t2, 2                   /* x4 */
+	REG_ADDU t3, t1, t2
+	LONG_L	k0, (t3)
+	andi	k0, k0, 0xff
+	mtc0	k0, CP0_ENTRYHI
+	ehb
+
+	/* Disable RDHWR access */
+	mtc0	zero, CP0_HWRENA
+
+	/* Now load up the Guest Context from VCPU */
+	LONG_L	$1, VCPU_R1(k1)
+	LONG_L	$2, VCPU_R2(k1)
+	LONG_L	$3, VCPU_R3(k1)
+
+	LONG_L	$4, VCPU_R4(k1)
+	LONG_L	$5, VCPU_R5(k1)
+	LONG_L	$6, VCPU_R6(k1)
+	LONG_L	$7, VCPU_R7(k1)
+
+	LONG_L	$8, VCPU_R8(k1)
+	LONG_L	$9, VCPU_R9(k1)
+	LONG_L	$10, VCPU_R10(k1)
+	LONG_L	$11, VCPU_R11(k1)
+	LONG_L	$12, VCPU_R12(k1)
+	LONG_L	$13, VCPU_R13(k1)
+	LONG_L	$14, VCPU_R14(k1)
+	LONG_L	$15, VCPU_R15(k1)
+	LONG_L	$16, VCPU_R16(k1)
+	LONG_L	$17, VCPU_R17(k1)
+	LONG_L	$18, VCPU_R18(k1)
+	LONG_L	$19, VCPU_R19(k1)
+	LONG_L	$20, VCPU_R20(k1)
+	LONG_L	$21, VCPU_R21(k1)
+	LONG_L	$22, VCPU_R22(k1)
+	LONG_L	$23, VCPU_R23(k1)
+	LONG_L	$24, VCPU_R24(k1)
+	LONG_L	$25, VCPU_R25(k1)
+
+	/* k0/k1 loaded up later */
+
+	LONG_L	$28, VCPU_R28(k1)
+	LONG_L	$29, VCPU_R29(k1)
+	LONG_L	$30, VCPU_R30(k1)
+	LONG_L	$31, VCPU_R31(k1)
+
+	/* Restore hi/lo */
+	LONG_L	k0, VCPU_LO(k1)
+	mtlo	k0
+
+	LONG_L	k0, VCPU_HI(k1)
+	mthi	k0
 
 FEXPORT(__kvm_mips_load_k0k1)
 	/* Restore the guest's k0/k1 registers */
-    LONG_L     	k0, VCPU_R26(k1)
-    LONG_L     	k1, VCPU_R27(k1)
+	LONG_L	k0, VCPU_R26(k1)
+	LONG_L	k1, VCPU_R27(k1)
 
-    /* Jump to guest */
+	/* Jump to guest */
 	eret
-	.set	pop
 
 VECTOR(MIPSX(exception), unknown)
 /*
  * Find out what mode we came from and jump to the proper handler.
  */
-    .set    push
-	.set	noat
-    .set    noreorder
-    mtc0    k0, CP0_ERROREPC    #01: Save guest k0
-    ehb                         #02:
-
-    mfc0    k0, CP0_EBASE       #02: Get EBASE
-    srl     k0, k0, 10          #03: Get rid of CPUNum
-    sll     k0, k0, 10          #04
-    LONG_S  k1, 0x3000(k0)      #05: Save k1 @ offset 0x3000
-    addiu   k0, k0, 0x2000      #06: Exception handler is installed @ offset 0x2000
-	j	k0				        #07: jump to the function
-	nop				        	#08: branch delay slot
-	.set	push
+	mtc0	k0, CP0_ERROREPC	#01: Save guest k0
+	ehb				#02:
+
+	mfc0	k0, CP0_EBASE		#02: Get EBASE
+	INT_SRL	k0, k0, 10		#03: Get rid of CPUNum
+	INT_SLL	k0, k0, 10		#04
+	LONG_S	k1, 0x3000(k0)		#05: Save k1 @ offset 0x3000
+	INT_ADDIU k0, k0, 0x2000		#06: Exception handler is installed @ offset 0x2000
+	j	k0			#07: jump to the function
+	 nop				#08: branch delay slot
 VECTOR_END(MIPSX(exceptionEnd))
 .end MIPSX(exception)
 
@@ -253,329 +251,327 @@ VECTOR_END(MIPSX(exceptionEnd))
  *
  */
 NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
-    .set    push
-    .set    noat
-    .set    noreorder
-
-    /* Get the VCPU pointer from DDTATA_LO */
-    mfc0        k1, CP0_DDATA_LO
-	addiu		k1, k1, VCPU_HOST_ARCH
-
-    /* Start saving Guest context to VCPU */
-    LONG_S  $0, VCPU_R0(k1)
-    LONG_S  $1, VCPU_R1(k1)
-    LONG_S  $2, VCPU_R2(k1)
-    LONG_S  $3, VCPU_R3(k1)
-    LONG_S  $4, VCPU_R4(k1)
-    LONG_S  $5, VCPU_R5(k1)
-    LONG_S  $6, VCPU_R6(k1)
-    LONG_S  $7, VCPU_R7(k1)
-    LONG_S  $8, VCPU_R8(k1)
-    LONG_S  $9, VCPU_R9(k1)
-    LONG_S  $10, VCPU_R10(k1)
-    LONG_S  $11, VCPU_R11(k1)
-    LONG_S  $12, VCPU_R12(k1)
-    LONG_S  $13, VCPU_R13(k1)
-    LONG_S  $14, VCPU_R14(k1)
-    LONG_S  $15, VCPU_R15(k1)
-    LONG_S  $16, VCPU_R16(k1)
-    LONG_S  $17,VCPU_R17(k1)
-    LONG_S  $18, VCPU_R18(k1)
-    LONG_S  $19, VCPU_R19(k1)
-    LONG_S  $20, VCPU_R20(k1)
-    LONG_S  $21, VCPU_R21(k1)
-    LONG_S  $22, VCPU_R22(k1)
-    LONG_S  $23, VCPU_R23(k1)
-    LONG_S  $24, VCPU_R24(k1)
-    LONG_S  $25, VCPU_R25(k1)
-
-    /* Guest k0/k1 saved later */
-
-    LONG_S  $28, VCPU_R28(k1)
-    LONG_S  $29, VCPU_R29(k1)
-    LONG_S  $30, VCPU_R30(k1)
-    LONG_S  $31, VCPU_R31(k1)
-
-    /* We need to save hi/lo and restore them on
-     * the way out
-     */
-    mfhi    t0
-    LONG_S  t0, VCPU_HI(k1)
-
-    mflo    t0
-    LONG_S  t0, VCPU_LO(k1)
-
-    /* Finally save guest k0/k1 to VCPU */
-    mfc0    t0, CP0_ERROREPC
-    LONG_S  t0, VCPU_R26(k1)
-
-    /* Get GUEST k1 and save it in VCPU */
-    la      t1, ~0x2ff
-    mfc0    t0, CP0_EBASE
-    and     t0, t0, t1
-    LONG_L  t0, 0x3000(t0)
-    LONG_S  t0, VCPU_R27(k1)
-
-    /* Now that context has been saved, we can use other registers */
-
-    /* Restore vcpu */
-    mfc0        a1, CP0_DDATA_LO
-    move        s1, a1
-
-   /* Restore run (vcpu->run) */
-    LONG_L      a0, VCPU_RUN(a1)
-    /* Save pointer to run in s0, will be saved by the compiler */
-    move        s0, a0
-
-
-    /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process the exception */
-    mfc0    k0,CP0_EPC
-    LONG_S  k0, VCPU_PC(k1)
-
-    mfc0    k0, CP0_BADVADDR
-    LONG_S  k0, VCPU_HOST_CP0_BADVADDR(k1)
-
-    mfc0    k0, CP0_CAUSE
-    LONG_S  k0, VCPU_HOST_CP0_CAUSE(k1)
-
-    mfc0    k0, CP0_ENTRYHI
-    LONG_S  k0, VCPU_HOST_ENTRYHI(k1)
-
-    /* Now restore the host state just enough to run the handlers */
-
-    /* Swtich EBASE to the one used by Linux */
-    /* load up the host EBASE */
-    mfc0        v0, CP0_STATUS
-
-    .set at
-	or          k0, v0, ST0_BEV
-    .set noat
-
-    mtc0        k0, CP0_STATUS
-    ehb
-
-    LONG_L      k0, VCPU_HOST_EBASE(k1)
-    mtc0        k0,CP0_EBASE
-
-
-    /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-    .set at
-	and         v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
-    or          v0, v0, ST0_CU0
-    .set noat
-    mtc0        v0, CP0_STATUS
-    ehb
-
-    /* Load up host GP */
-    LONG_L  gp, VCPU_HOST_GP(k1)
-
-    /* Need a stack before we can jump to "C" */
-    LONG_L  sp, VCPU_HOST_STACK(k1)
-
-    /* Saved host state */
-    addiu   sp,sp, -PT_SIZE
+	/* Get the VCPU pointer from DDTATA_LO */
+	mfc0	k1, CP0_DDATA_LO
+	INT_ADDIU k1, k1, VCPU_HOST_ARCH
+
+	/* Start saving Guest context to VCPU */
+	LONG_S	$0, VCPU_R0(k1)
+	LONG_S	$1, VCPU_R1(k1)
+	LONG_S	$2, VCPU_R2(k1)
+	LONG_S	$3, VCPU_R3(k1)
+	LONG_S	$4, VCPU_R4(k1)
+	LONG_S	$5, VCPU_R5(k1)
+	LONG_S	$6, VCPU_R6(k1)
+	LONG_S	$7, VCPU_R7(k1)
+	LONG_S	$8, VCPU_R8(k1)
+	LONG_S	$9, VCPU_R9(k1)
+	LONG_S	$10, VCPU_R10(k1)
+	LONG_S	$11, VCPU_R11(k1)
+	LONG_S	$12, VCPU_R12(k1)
+	LONG_S	$13, VCPU_R13(k1)
+	LONG_S	$14, VCPU_R14(k1)
+	LONG_S	$15, VCPU_R15(k1)
+	LONG_S	$16, VCPU_R16(k1)
+	LONG_S	$17, VCPU_R17(k1)
+	LONG_S	$18, VCPU_R18(k1)
+	LONG_S	$19, VCPU_R19(k1)
+	LONG_S	$20, VCPU_R20(k1)
+	LONG_S	$21, VCPU_R21(k1)
+	LONG_S	$22, VCPU_R22(k1)
+	LONG_S	$23, VCPU_R23(k1)
+	LONG_S	$24, VCPU_R24(k1)
+	LONG_S	$25, VCPU_R25(k1)
+
+	/* Guest k0/k1 saved later */
+
+	LONG_S	$28, VCPU_R28(k1)
+	LONG_S	$29, VCPU_R29(k1)
+	LONG_S	$30, VCPU_R30(k1)
+	LONG_S	$31, VCPU_R31(k1)
+
+	/* We need to save hi/lo and restore them on
+	 * the way out
+	 */
+	mfhi	t0
+	LONG_S	t0, VCPU_HI(k1)
+
+	mflo	t0
+	LONG_S	t0, VCPU_LO(k1)
+
+	/* Finally save guest k0/k1 to VCPU */
+	mfc0	t0, CP0_ERROREPC
+	LONG_S	t0, VCPU_R26(k1)
+
+	/* Get GUEST k1 and save it in VCPU */
+	PTR_LI	t1, ~0x2ff
+	mfc0	t0, CP0_EBASE
+	and	t0, t0, t1
+	LONG_L	t0, 0x3000(t0)
+	LONG_S	t0, VCPU_R27(k1)
+
+	/* Now that context has been saved, we can use other registers */
+
+	/* Restore vcpu */
+	mfc0	a1, CP0_DDATA_LO
+	move	s1, a1
+
+	/* Restore run (vcpu->run) */
+	LONG_L	a0, VCPU_RUN(a1)
+	/* Save pointer to run in s0, will be saved by the compiler */
+	move	s0, a0
+
+	/* Save Host level EPC, BadVaddr and Cause to VCPU, useful to
+	 * process the exception */
+	mfc0	k0,CP0_EPC
+	LONG_S	k0, VCPU_PC(k1)
+
+	mfc0	k0, CP0_BADVADDR
+	LONG_S	k0, VCPU_HOST_CP0_BADVADDR(k1)
+
+	mfc0	k0, CP0_CAUSE
+	LONG_S	k0, VCPU_HOST_CP0_CAUSE(k1)
+
+	mfc0	k0, CP0_ENTRYHI
+	LONG_S	k0, VCPU_HOST_ENTRYHI(k1)
+
+	/* Now restore the host state just enough to run the handlers */
+
+	/* Swtich EBASE to the one used by Linux */
+	/* load up the host EBASE */
+	mfc0	v0, CP0_STATUS
+
+	.set	at
+	or	k0, v0, ST0_BEV
+	.set	noat
+
+	mtc0	k0, CP0_STATUS
+	ehb
+
+	LONG_L	k0, VCPU_HOST_EBASE(k1)
+	mtc0	k0,CP0_EBASE
+
 
-    /* XXXKYMA do we need to load the host ASID, maybe not because the
-     * kernel entries are marked GLOBAL, need to verify
-     */
+	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
+	.set	at
+	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
+	or	v0, v0, ST0_CU0
+	.set	noat
+	mtc0	v0, CP0_STATUS
+	ehb
+
+	/* Load up host GP */
+	LONG_L	gp, VCPU_HOST_GP(k1)
+
+	/* Need a stack before we can jump to "C" */
+	LONG_L	sp, VCPU_HOST_STACK(k1)
+
+	/* Saved host state */
+	INT_ADDIU sp, sp, -PT_SIZE
 
-    /* Restore host DDATA_LO */
-    LONG_L      k0, PT_HOST_USERLOCAL(sp)
-    mtc0        k0, CP0_DDATA_LO
+	/* XXXKYMA do we need to load the host ASID, maybe not because the
+	 * kernel entries are marked GLOBAL, need to verify
+	 */
 
-    /* Restore RDHWR access */
-    la      k0, 0x2000000F
-    mtc0    k0,  CP0_HWRENA
+	/* Restore host DDATA_LO */
+	LONG_L	k0, PT_HOST_USERLOCAL(sp)
+	mtc0	k0, CP0_DDATA_LO
 
-    /* Jump to handler */
+	/* Restore RDHWR access */
+	PTR_LI	k0, 0x2000000F
+	mtc0	k0, CP0_HWRENA
+
+	/* Jump to handler */
 FEXPORT(__kvm_mips_jump_to_handler)
-    /* XXXKYMA: not sure if this is safe, how large is the stack?? */
-    /* Now jump to the kvm_mips_handle_exit() to see if we can deal with this in the kernel */
-    la          t9,kvm_mips_handle_exit
-    jalr.hb     t9
-    addiu       sp,sp, -CALLFRAME_SIZ           /* BD Slot */
-
-    /* Return from handler Make sure interrupts are disabled */
-    di
-    ehb
-
-    /* XXXKYMA: k0/k1 could have been blown away if we processed an exception
-     * while we were handling the exception from the guest, reload k1
-     */
-    move        k1, s1
-	addiu		k1, k1, VCPU_HOST_ARCH
-
-    /* Check return value, should tell us if we are returning to the host (handle I/O etc)
-     * or resuming the guest
-     */
-    andi        t0, v0, RESUME_HOST
-    bnez        t0, __kvm_mips_return_to_host
-    nop
+	/* XXXKYMA: not sure if this is safe, how large is the stack??
+	 * Now jump to the kvm_mips_handle_exit() to see if we can deal
+	 * with this in the kernel */
+	PTR_LA	t9, kvm_mips_handle_exit
+	jalr.hb	t9
+	 INT_ADDIU sp, sp, -CALLFRAME_SIZ           /* BD Slot */
+
+	/* Return from handler Make sure interrupts are disabled */
+	di
+	ehb
+
+	/* XXXKYMA: k0/k1 could have been blown away if we processed
+	 * an exception while we were handling the exception from the
+	 * guest, reload k1
+	 */
+
+	move	k1, s1
+	INT_ADDIU k1, k1, VCPU_HOST_ARCH
+
+	/* Check return value, should tell us if we are returning to the
+	 * host (handle I/O etc)or resuming the guest
+	 */
+	andi	t0, v0, RESUME_HOST
+	bnez	t0, __kvm_mips_return_to_host
+	 nop
 
 __kvm_mips_return_to_guest:
-    /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
-    mtc0        s1, CP0_DDATA_LO
-
-    /* Load up the Guest EBASE to minimize the window where BEV is set */
-    LONG_L      t0, VCPU_GUEST_EBASE(k1)
-
-    /* Switch EBASE back to the one used by KVM */
-    mfc0        v1, CP0_STATUS
-    .set at
-	or          k0, v1, ST0_BEV
-    .set noat
-    mtc0        k0, CP0_STATUS
-    ehb
-    mtc0        t0,CP0_EBASE
-
-    /* Setup status register for running guest in UM */
-    .set at
-    or     v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
-    and     v1, v1, ~ST0_CU0
-    .set noat
-    mtc0    v1, CP0_STATUS
-    ehb
+	/* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
+	mtc0	s1, CP0_DDATA_LO
 
+	/* Load up the Guest EBASE to minimize the window where BEV is set */
+	LONG_L	t0, VCPU_GUEST_EBASE(k1)
+
+	/* Switch EBASE back to the one used by KVM */
+	mfc0	v1, CP0_STATUS
+	.set	at
+	or	k0, v1, ST0_BEV
+	.set	noat
+	mtc0	k0, CP0_STATUS
+	ehb
+	mtc0	t0, CP0_EBASE
+
+	/* Setup status register for running guest in UM */
+	.set	at
+	or	v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
+	and	v1, v1, ~ST0_CU0
+	.set	noat
+	mtc0	v1, CP0_STATUS
+	ehb
 
 	/* Set Guest EPC */
-	LONG_L		t0, VCPU_PC(k1)
-	mtc0		t0, CP0_EPC
-
-    /* Set the ASID for the Guest Kernel */
-    sll         t0, t0, 1                       /* with kseg0 @ 0x40000000, kernel */
-                                                /* addresses shift to 0x80000000 */
-    bltz        t0, 1f                          /* If kernel */
-	addiu       t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-    addiu       t1, k1, VCPU_GUEST_USER_ASID    /* else user */
+	LONG_L	t0, VCPU_PC(k1)
+	mtc0	t0, CP0_EPC
+
+	/* Set the ASID for the Guest Kernel */
+	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
+				/* addresses shift to 0x80000000 */
+	bltz	t0, 1f		/* If kernel */
+	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
+	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
-    /* t1: contains the base of the ASID array, need to get the cpu id  */
-    LONG_L      t2, TI_CPU($28)             /* smp_processor_id */
-    sll         t2, t2, 2                   /* x4 */
-    addu        t3, t1, t2
-    LONG_L      k0, (t3)
-    andi        k0, k0, 0xff
-	mtc0		k0,CP0_ENTRYHI
-    ehb
-
-    /* Disable RDHWR access */
-    mtc0    zero,  CP0_HWRENA
-
-    /* load the guest context from VCPU and return */
-    LONG_L  $0, VCPU_R0(k1)
-    LONG_L  $1, VCPU_R1(k1)
-    LONG_L  $2, VCPU_R2(k1)
-    LONG_L  $3, VCPU_R3(k1)
-    LONG_L  $4, VCPU_R4(k1)
-    LONG_L  $5, VCPU_R5(k1)
-    LONG_L  $6, VCPU_R6(k1)
-    LONG_L  $7, VCPU_R7(k1)
-    LONG_L  $8, VCPU_R8(k1)
-    LONG_L  $9, VCPU_R9(k1)
-    LONG_L  $10, VCPU_R10(k1)
-    LONG_L  $11, VCPU_R11(k1)
-    LONG_L  $12, VCPU_R12(k1)
-    LONG_L  $13, VCPU_R13(k1)
-    LONG_L  $14, VCPU_R14(k1)
-    LONG_L  $15, VCPU_R15(k1)
-    LONG_L  $16, VCPU_R16(k1)
-    LONG_L  $17, VCPU_R17(k1)
-    LONG_L  $18, VCPU_R18(k1)
-    LONG_L  $19, VCPU_R19(k1)
-    LONG_L  $20, VCPU_R20(k1)
-    LONG_L  $21, VCPU_R21(k1)
-    LONG_L  $22, VCPU_R22(k1)
-    LONG_L  $23, VCPU_R23(k1)
-    LONG_L  $24, VCPU_R24(k1)
-    LONG_L  $25, VCPU_R25(k1)
-
-    /* $/k1 loaded later */
-    LONG_L  $28, VCPU_R28(k1)
-    LONG_L  $29, VCPU_R29(k1)
-    LONG_L  $30, VCPU_R30(k1)
-    LONG_L  $31, VCPU_R31(k1)
+	/* t1: contains the base of the ASID array, need to get the cpu id  */
+	LONG_L	t2, TI_CPU($28)		/* smp_processor_id */
+	INT_SLL	t2, t2, 2		/* x4 */
+	REG_ADDU t3, t1, t2
+	LONG_L	k0, (t3)
+	andi	k0, k0, 0xff
+	mtc0	k0,CP0_ENTRYHI
+	ehb
+
+	/* Disable RDHWR access */
+	mtc0    zero,  CP0_HWRENA
+
+	/* load the guest context from VCPU and return */
+	LONG_L	$0, VCPU_R0(k1)
+	LONG_L	$1, VCPU_R1(k1)
+	LONG_L	$2, VCPU_R2(k1)
+	LONG_L	$3, VCPU_R3(k1)
+	LONG_L	$4, VCPU_R4(k1)
+	LONG_L	$5, VCPU_R5(k1)
+	LONG_L	$6, VCPU_R6(k1)
+	LONG_L	$7, VCPU_R7(k1)
+	LONG_L	$8, VCPU_R8(k1)
+	LONG_L	$9, VCPU_R9(k1)
+	LONG_L	$10, VCPU_R10(k1)
+	LONG_L	$11, VCPU_R11(k1)
+	LONG_L	$12, VCPU_R12(k1)
+	LONG_L	$13, VCPU_R13(k1)
+	LONG_L	$14, VCPU_R14(k1)
+	LONG_L	$15, VCPU_R15(k1)
+	LONG_L	$16, VCPU_R16(k1)
+	LONG_L	$17, VCPU_R17(k1)
+	LONG_L	$18, VCPU_R18(k1)
+	LONG_L	$19, VCPU_R19(k1)
+	LONG_L	$20, VCPU_R20(k1)
+	LONG_L	$21, VCPU_R21(k1)
+	LONG_L	$22, VCPU_R22(k1)
+	LONG_L	$23, VCPU_R23(k1)
+	LONG_L	$24, VCPU_R24(k1)
+	LONG_L	$25, VCPU_R25(k1)
+
+	/* $/k1 loaded later */
+	LONG_L	$28, VCPU_R28(k1)
+	LONG_L	$29, VCPU_R29(k1)
+	LONG_L	$30, VCPU_R30(k1)
+	LONG_L	$31, VCPU_R31(k1)
 
 FEXPORT(__kvm_mips_skip_guest_restore)
-    LONG_L  k0, VCPU_HI(k1)
-    mthi    k0
+	LONG_L	k0, VCPU_HI(k1)
+	mthi	k0
 
-    LONG_L  k0, VCPU_LO(k1)
-    mtlo    k0
+	LONG_L	k0, VCPU_LO(k1)
+	mtlo	k0
 
-    LONG_L  k0, VCPU_R26(k1)
-    LONG_L  k1, VCPU_R27(k1)
+	LONG_L	k0, VCPU_R26(k1)
+	LONG_L	k1, VCPU_R27(k1)
 
-    eret
+	eret
 
 __kvm_mips_return_to_host:
-    /* EBASE is already pointing to Linux */
-    LONG_L  k1, VCPU_HOST_STACK(k1)
-	addiu  	k1,k1, -PT_SIZE
-
-    /* Restore host DDATA_LO */
-    LONG_L      k0, PT_HOST_USERLOCAL(k1)
-    mtc0        k0, CP0_DDATA_LO
-
-    /* Restore host ASID */
-    LONG_L      k0, PT_HOST_ASID(sp)
-    andi        k0, 0xff
-    mtc0        k0,CP0_ENTRYHI
-    ehb
-
-    /* Load context saved on the host stack */
-    LONG_L  $0, PT_R0(k1)
-    LONG_L  $1, PT_R1(k1)
-
-    /* r2/v0 is the return code, shift it down by 2 (arithmetic) to recover the err code  */
-    sra     k0, v0, 2
-    move    $2, k0
-
-    LONG_L  $3, PT_R3(k1)
-    LONG_L  $4, PT_R4(k1)
-    LONG_L  $5, PT_R5(k1)
-    LONG_L  $6, PT_R6(k1)
-    LONG_L  $7, PT_R7(k1)
-    LONG_L  $8, PT_R8(k1)
-    LONG_L  $9, PT_R9(k1)
-    LONG_L  $10, PT_R10(k1)
-    LONG_L  $11, PT_R11(k1)
-    LONG_L  $12, PT_R12(k1)
-    LONG_L  $13, PT_R13(k1)
-    LONG_L  $14, PT_R14(k1)
-    LONG_L  $15, PT_R15(k1)
-    LONG_L  $16, PT_R16(k1)
-    LONG_L  $17, PT_R17(k1)
-    LONG_L  $18, PT_R18(k1)
-    LONG_L  $19, PT_R19(k1)
-    LONG_L  $20, PT_R20(k1)
-    LONG_L  $21, PT_R21(k1)
-    LONG_L  $22, PT_R22(k1)
-    LONG_L  $23, PT_R23(k1)
-    LONG_L  $24, PT_R24(k1)
-    LONG_L  $25, PT_R25(k1)
-
-    /* Host k0/k1 were not saved */
-
-    LONG_L  $28, PT_R28(k1)
-    LONG_L  $29, PT_R29(k1)
-    LONG_L  $30, PT_R30(k1)
-
-    LONG_L  k0, PT_HI(k1)
-    mthi    k0
-
-    LONG_L  k0, PT_LO(k1)
-    mtlo    k0
-
-    /* Restore RDHWR access */
-    la      k0, 0x2000000F
-    mtc0    k0,  CP0_HWRENA
-
-
-    /* Restore RA, which is the address we will return to */
-    LONG_L  ra, PT_R31(k1)
-    j       ra
-    nop
-
-    .set    pop
+	/* EBASE is already pointing to Linux */
+	LONG_L	k1, VCPU_HOST_STACK(k1)
+	INT_ADDIU k1,k1, -PT_SIZE
+
+	/* Restore host DDATA_LO */
+	LONG_L	k0, PT_HOST_USERLOCAL(k1)
+	mtc0	k0, CP0_DDATA_LO
+
+	/* Restore host ASID */
+	LONG_L	k0, PT_HOST_ASID(sp)
+	andi	k0, 0xff
+	mtc0	k0,CP0_ENTRYHI
+	ehb
+
+	/* Load context saved on the host stack */
+	LONG_L	$0, PT_R0(k1)
+	LONG_L	$1, PT_R1(k1)
+
+	/* r2/v0 is the return code, shift it down by 2 (arithmetic)
+	 * to recover the err code  */
+	INT_SRA	k0, v0, 2
+	move	$2, k0
+
+	LONG_L	$3, PT_R3(k1)
+	LONG_L	$4, PT_R4(k1)
+	LONG_L	$5, PT_R5(k1)
+	LONG_L	$6, PT_R6(k1)
+	LONG_L	$7, PT_R7(k1)
+	LONG_L	$8, PT_R8(k1)
+	LONG_L	$9, PT_R9(k1)
+	LONG_L	$10, PT_R10(k1)
+	LONG_L	$11, PT_R11(k1)
+	LONG_L	$12, PT_R12(k1)
+	LONG_L	$13, PT_R13(k1)
+	LONG_L	$14, PT_R14(k1)
+	LONG_L	$15, PT_R15(k1)
+	LONG_L	$16, PT_R16(k1)
+	LONG_L	$17, PT_R17(k1)
+	LONG_L	$18, PT_R18(k1)
+	LONG_L	$19, PT_R19(k1)
+	LONG_L	$20, PT_R20(k1)
+	LONG_L	$21, PT_R21(k1)
+	LONG_L	$22, PT_R22(k1)
+	LONG_L	$23, PT_R23(k1)
+	LONG_L	$24, PT_R24(k1)
+	LONG_L	$25, PT_R25(k1)
+
+	/* Host k0/k1 were not saved */
+
+	LONG_L	$28, PT_R28(k1)
+	LONG_L	$29, PT_R29(k1)
+	LONG_L	$30, PT_R30(k1)
+
+	LONG_L	k0, PT_HI(k1)
+	mthi	k0
+
+	LONG_L	k0, PT_LO(k1)
+	mtlo	k0
+
+	/* Restore RDHWR access */
+	PTR_LI	k0, 0x2000000F
+	mtc0	k0,  CP0_HWRENA
+
+
+	/* Restore RA, which is the address we will return to */
+	LONG_L  ra, PT_R31(k1)
+	j       ra
+	 nop
+
 VECTOR_END(MIPSX(GuestExceptionEnd))
 .end MIPSX(GuestException)
 
@@ -627,24 +623,23 @@ MIPSX(exceptions):
 
 #define HW_SYNCI_Step       $1
 LEAF(MIPSX(SyncICache))
-    .set    push
+	.set	push
 	.set	mips32r2
-    beq     a1, zero, 20f
-    nop
-    addu    a1, a0, a1
-    rdhwr   v0, HW_SYNCI_Step
-    beq     v0, zero, 20f
-    nop
-
+	beq	a1, zero, 20f
+	 nop
+	REG_ADDU a1, a0, a1
+	rdhwr	v0, HW_SYNCI_Step
+	beq	v0, zero, 20f
+	 nop
 10:
-    synci   0(a0)
-    addu    a0, a0, v0
-    sltu    v1, a0, a1
-    bne     v1, zero, 10b
-    nop
-    sync
+	synci	0(a0)
+	REG_ADDU a0, a0, v0
+	sltu	v1, a0, a1
+	bne	v1, zero, 10b
+	 nop
+	sync
 20:
-    jr.hb   ra
-    nop
-    .set pop
+	jr.hb	ra
+	 nop
+	.set	pop
 END(MIPSX(SyncICache))
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index dd203e59e6fd..a7b044536de4 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                 struct kvm_memory_slot *memslot,
                                 struct kvm_userspace_memory_region *mem,
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index af763e838fdd..5e5424753b56 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -33,7 +33,7 @@ static int op_mips_setup(void)
 	return 0;
 }
 
-static int op_mips_create_files(struct super_block *sb, struct dentry *root)
+static int op_mips_create_files(struct dentry *root)
 {
 	int i;
 
@@ -42,16 +42,16 @@ static int op_mips_create_files(struct super_block *sb, struct dentry *root)
 		char buf[4];
 
 		snprintf(buf, sizeof buf, "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
-
-		oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
-		oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
-		oprofilefs_create_ulong(sb, dir, "exl", &ctr[i].exl);
+		dir = oprofilefs_mkdir(root, buf);
+
+		oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
+		oprofilefs_create_ulong(dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(dir, "count", &ctr[i].count);
+		oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(dir, "exl", &ctr[i].exl);
 		/* Dummy.  */
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
+		oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
 	}
 
 	return 0;
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 594e60d6a43b..33e7aa52d9c4 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -113,7 +113,6 @@ static void pcibios_scanbus(struct pci_controller *hose)
 		if (!pci_has_flag(PCI_PROBE_ONLY)) {
 			pci_bus_size_bridges(bus);
 			pci_bus_assign_resources(bus);
-			pci_enable_bridges(bus);
 		}
 	}
 }
diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c
index dd0ab982d77e..f9407e170476 100644
--- a/arch/mips/sni/a20r.c
+++ b/arch/mips/sni/a20r.c
@@ -122,7 +122,6 @@ static struct resource sc26xx_rsrc[] = {
 
 static struct sccnxp_pdata sccnxp_data = {
 	.reg_shift	= 2,
-	.frequency	= 3686400,
 	.mctrl_cfg[0]	= MCTRL_SIG(DTR_OP, LINE_OP7) |
 			  MCTRL_SIG(RTS_OP, LINE_OP3) |
 			  MCTRL_SIG(DSR_IP, LINE_IP5) |
diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h
index bbb34e5343a2..eb59bfe23e85 100644
--- a/arch/openrisc/include/asm/prom.h
+++ b/arch/openrisc/include/asm/prom.h
@@ -44,9 +44,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 
 extern void kdump_move_device_tree(void);
 
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
 /* Get the MAC address */
 extern const void *of_get_mac_address(struct device_node *np);
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9cf59816d3e9..5aecda05e0da 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -369,9 +369,9 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
 
 config CRASH_DUMP
 	bool "Build a kdump crash kernel"
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 650757c300db..704e6f10ae80 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += vtime.h
+\ No newline at end of file
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 08891d07aeb6..fa19e2f1a874 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+	ulong pc = kvmppc_get_pc(vcpu) - 4;
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	u32 r;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
+
+	r = svcpu->last_inst;
+	svcpu_put(svcpu);
+	return r;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 	return vcpu->arch.last_inst;
 }
 
+/*
+ * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
+ * Because the sc instruction sets SRR0 to point to the following
+ * instruction, we have to fetch from pc - 4.
+ */
+static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
+{
+	ulong pc = kvmppc_get_pc(vcpu) - 4;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
+
+	return vcpu->arch.last_inst;
+}
+
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dar;
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index a1ecb14e4442..86d638a3b359 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern int kvm_hpt_order;		/* order of preallocated HPTs */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 			/* (masks depend on page size) */
 			rb |= 0x1000;		/* page encoding in LP field */
 			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
-			rb |= (va_low & 0xfe);	/* AVAL field (P7 doesn't seem to care) */
+			rb |= ((va_low << 4) & 0xf0);	/* AVAL field (P7 doesn't seem to care) */
 		}
 	} else {
 		/* 4kB page */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..33283532e9d8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@ struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
@@ -259,7 +255,7 @@ struct kvm_arch {
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
-	struct kvmppc_linear_info *hpt_li;
+	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..b15554a26c20 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
-extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
-extern void kvm_release_hpt(struct kvmppc_linear_info *li);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
+extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 struct openpic;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
+extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
-static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
+static inline void __init kvm_cma_reserve(void)
 {}
 
-static inline void kvm_linear_init(void)
+static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
 static inline u32 kvmppc_get_xics_latch(void)
@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 	}
 }
 
-/* Please call after prepare_to_enter. This function puts the lazy ee state
-   back to normal mode, without actually enabling interrupts. */
-static inline void kvmppc_lazy_ee_enable(void)
+/*
+ * Please call after prepare_to_enter. This function puts the lazy ee and irq
+ * disabled tracking state back to normal mode, without actually enabling
+ * interrupts.
+ */
+static inline void kvmppc_fix_ee_before_entry(void)
 {
+	trace_hardirqs_on();
+
 #ifdef CONFIG_PPC64
 	/* Only need to enable IRQs by hard enabling them after this */
 	local_paca->irq_happened = 0;
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8b2492644754..3fd2f1b6f906 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -138,11 +138,11 @@ extern ssize_t power_events_sysfs_show(struct device *dev,
 #define	EVENT_PTR(_id, _suffix)		&EVENT_VAR(_id, _suffix).attr.attr
 
 #define	EVENT_ATTR(_name, _id, _suffix)					\
-	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,	\
+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id,	\
 			power_events_sysfs_show)
 
 #define	GENERIC_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _g)
 #define	GENERIC_EVENT_PTR(_id)		EVENT_PTR(_id, _g)
 
-#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(PM_##_name, _id, _p)
+#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _p)
 #define	POWER_EVENT_PTR(_id)		EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bc2da154f68b..ac204e022922 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 
 extern void kdump_move_device_tree(void);
 
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
 /* cache lookup */
 struct device_node *of_find_next_cache_node(struct device_node *np);
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8207459efe56..d8958be5f31a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -454,6 +454,7 @@ int main(void)
 	DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
 	DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
 #endif
+	DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
 	DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
 	DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
 	DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index b20ff173a671..0adab06ce5c0 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -105,7 +105,7 @@ static int __init fail_iommu_debugfs(void)
 	struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
 						       NULL, &fail_iommu);
 
-	return PTR_RET(dir);
+	return PTR_ERR_OR_ZERO(dir);
 }
 late_initcall(fail_iommu_debugfs);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 7d22a675fe1a..2b4a9a4db7d9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1674,12 +1674,8 @@ void pcibios_scan_phb(struct pci_controller *hose)
 	/* Configure PCI Express settings */
 	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
 		struct pci_bus *child;
-		list_for_each_entry(child, &bus->children, node) {
-			struct pci_dev *self = child->self;
-			if (!self)
-				continue;
-			pcie_bus_configure_settings(child, self->pcie_mpss);
-		}
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
 	}
 }
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eb23ac92abb9..1c14cd4a5e05 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -865,49 +865,10 @@ static int __init prom_reconfig_setup(void)
 __initcall(prom_reconfig_setup);
 #endif
 
-/* Find the device node for a given logical cpu number, also returns the cpu
- * local thread number (index in ibm,interrupt-server#s) if relevant and
- * asked for (non NULL)
- */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
-	int hardid;
-	struct device_node *np;
-
-	hardid = get_hard_smp_processor_id(cpu);
-
-	for_each_node_by_type(np, "cpu") {
-		const u32 *intserv;
-		unsigned int plen, t;
-
-		/* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
-		 * fallback to "reg" property and assume no threads
-		 */
-		intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
-				&plen);
-		if (intserv == NULL) {
-			const u32 *reg = of_get_property(np, "reg", NULL);
-			if (reg == NULL)
-				continue;
-			if (*reg == hardid) {
-				if (thread)
-					*thread = 0;
-				return np;
-			}
-		} else {
-			plen /= sizeof(u32);
-			for (t = 0; t < plen; t++) {
-				if (hardid == intserv[t]) {
-					if (thread)
-						*thread = t;
-					return np;
-				}
-			}
-		}
-	}
-	return NULL;
+	return (int)phys_id == get_hard_smp_processor_id(cpu);
 }
-EXPORT_SYMBOL(of_get_cpu_node);
 
 #if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
 static struct debugfs_blob_wrapper flat_dt_blob;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 389fb8077cc9..fe6a58c9f0b7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	kvm_cma_reserve();
+
 	/*
 	 * Reserve any gigantic pages requested on the command line.
 	 * memblock needs to have been initialized by the time this is
@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 65ab9e909377..cdcc156865ef 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1049,7 +1049,7 @@ static int __init rtc_init(void)
 
 	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
 
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 module_init(rtc_init);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..ffaef2cb101a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select MMU_NOTIFIER
+	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
 	  virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 008cd856c5b5..6646c952c5e3 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 739bfbadb85e..7e345e00661a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	hva_t ptegp;
 	u64 pteg[16];
 	u64 avpn = 0;
+	u64 v, r;
+	u64 v_val, v_mask;
+	u64 eaddr_mask;
 	int i;
-	u8 key = 0;
+	u8 pp, key = 0;
 	bool found = false;
-	int second = 0;
+	bool second = false;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	/* Magic page override */
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		goto no_seg_found;
 
 	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+	v_val = avpn & HPTE_V_AVPN;
+
 	if (slbe->tb)
-		avpn |= SLB_VSID_B_1T;
+		v_val |= SLB_VSID_B_1T;
+	if (slbe->large)
+		v_val |= HPTE_V_LARGE;
+	v_val |= HPTE_V_VALID;
+
+	v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+		HPTE_V_SECONDARY;
 
 do_second:
 	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
@@ -227,91 +238,74 @@ do_second:
 		key = 4;
 
 	for (i=0; i<16; i+=2) {
-		u64 v = pteg[i];
-		u64 r = pteg[i+1];
-
-		/* Valid check */
-		if (!(v & HPTE_V_VALID))
-			continue;
-		/* Hash check */
-		if ((v & HPTE_V_SECONDARY) != second)
-			continue;
-
-		/* AVPN compare */
-		if (HPTE_V_COMPARE(avpn, v)) {
-			u8 pp = (r & HPTE_R_PP) | key;
-			int eaddr_mask = 0xFFF;
-
-			gpte->eaddr = eaddr;
-			gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
-								    eaddr,
-								    data);
-			if (slbe->large)
-				eaddr_mask = 0xFFFFFF;
-			gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
-			gpte->may_execute = ((r & HPTE_R_N) ? false : true);
-			gpte->may_read = false;
-			gpte->may_write = false;
-
-			switch (pp) {
-			case 0:
-			case 1:
-			case 2:
-			case 6:
-				gpte->may_write = true;
-				/* fall through */
-			case 3:
-			case 5:
-			case 7:
-				gpte->may_read = true;
-				break;
-			}
-
-			dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
-				"-> 0x%lx\n",
-				eaddr, avpn, gpte->vpage, gpte->raddr);
+		/* Check all relevant fields of 1st dword */
+		if ((pteg[i] & v_mask) == v_val) {
 			found = true;
 			break;
 		}
 	}
 
-	/* Update PTE R and C bits, so the guest's swapper knows we used the
-	 * page */
-	if (found) {
-		u32 oldr = pteg[i+1];
+	if (!found) {
+		if (second)
+			goto no_page_found;
+		v_val |= HPTE_V_SECONDARY;
+		second = true;
+		goto do_second;
+	}
 
-		if (gpte->may_read) {
-			/* Set the accessed flag */
-			pteg[i+1] |= HPTE_R_R;
-		}
-		if (gpte->may_write) {
-			/* Set the dirty flag */
-			pteg[i+1] |= HPTE_R_C;
-		} else {
-			dprintk("KVM: Mapping read-only page!\n");
-		}
+	v = pteg[i];
+	r = pteg[i+1];
+	pp = (r & HPTE_R_PP) | key;
+	eaddr_mask = 0xFFF;
+
+	gpte->eaddr = eaddr;
+	gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+	if (slbe->large)
+		eaddr_mask = 0xFFFFFF;
+	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+	gpte->may_read = false;
+	gpte->may_write = false;
+
+	switch (pp) {
+	case 0:
+	case 1:
+	case 2:
+	case 6:
+		gpte->may_write = true;
+		/* fall through */
+	case 3:
+	case 5:
+	case 7:
+		gpte->may_read = true;
+		break;
+	}
 
-		/* Write back into the PTEG */
-		if (pteg[i+1] != oldr)
-			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+	dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+		"-> 0x%lx\n",
+		eaddr, avpn, gpte->vpage, gpte->raddr);
 
-		if (!gpte->may_read)
-			return -EPERM;
-		return 0;
-	} else {
-		dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
-			"ptegp=0x%lx)\n",
-			eaddr, to_book3s(vcpu)->sdr1, ptegp);
-		for (i = 0; i < 16; i += 2)
-			dprintk("   %02d: 0x%llx - 0x%llx (0x%llx)\n",
-				i, pteg[i], pteg[i+1], avpn);
-
-		if (!second) {
-			second = HPTE_V_SECONDARY;
-			goto do_second;
-		}
+	/* Update PTE R and C bits, so the guest's swapper knows we used the
+	 * page */
+	if (gpte->may_read) {
+		/* Set the accessed flag */
+		r |= HPTE_R_R;
+	}
+	if (data && gpte->may_write) {
+		/* Set the dirty flag -- XXX even if not writing */
+		r |= HPTE_R_C;
+	}
+
+	/* Write back into the PTEG */
+	if (pteg[i+1] != r) {
+		pteg[i+1] = r;
+		copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
 	}
 
+	if (!gpte->may_read)
+		return -EPERM;
+	return 0;
+
 no_page_found:
 	return -ENOENT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710d31317d81..043eec8461e7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
 	struct revmap_entry *rev;
-	struct kvmppc_linear_info *li;
-	long order = kvm_hpt_order;
+	struct page *page = NULL;
+	long order = KVM_DEFAULT_HPT_ORDER;
 
 	if (htab_orderp) {
 		order = *htab_orderp;
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 			order = PPC_MIN_HPT_ORDER;
 	}
 
+	kvm->arch.hpt_cma_alloc = 0;
 	/*
-	 * If the user wants a different size from default,
 	 * try first to allocate it from the kernel page allocator.
+	 * We keep the CMA reserved for failed allocation.
 	 */
-	hpt = 0;
-	if (order != kvm_hpt_order) {
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, order - PAGE_SHIFT);
-		if (!hpt)
-			--order;
-	}
+	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+			       __GFP_NOWARN, order - PAGE_SHIFT);
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		li = kvm_alloc_hpt();
-		if (li) {
-			hpt = (ulong)li->base_virt;
-			kvm->arch.hpt_li = li;
-			order = kvm_hpt_order;
-		}
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+		if (page) {
+			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+			kvm->arch.hpt_cma_alloc = 1;
+		} else
+			--order;
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	return 0;
 
  out_freehpt:
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
 	else
 		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 	vfree(kvm->arch.revmap);
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+				1 << (kvm->arch.hpt_order - PAGE_SHIFT));
 	else
 		free_pages(kvm->arch.hpt_virt,
 			   kvm->arch.hpt_order - PAGE_SHIFT);
@@ -1579,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	ctx->first_pass = 1;
 
 	rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
-	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
 	if (ret < 0) {
 		kvm_put_kvm(kvm);
 		return ret;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index b2d3f3b2de72..54cf9bc94dad 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	mutex_unlock(&kvm->lock);
 
 	return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				stt, O_RDWR);
+				stt, O_RDWR | O_CLOEXEC);
 
 fail:
 	if (stt) {
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 1f6344c4408d..360ce68c9809 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_PMC4_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
+	case SPRN_DABR:
 		break;
 unprivileged:
 	default:
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_PMC4_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
+	case SPRN_DABR:
 		*spr_val = 0;
 		break;
 	default:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7629cd3eb91a..b0ee3bc9ca76 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 }
 
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+				  struct kvm_sregs *sregs)
 {
 	int i;
 
-	sregs->pvr = vcpu->arch.pvr;
-
 	memset(sregs, 0, sizeof(struct kvm_sregs));
+	sregs->pvr = vcpu->arch.pvr;
 	for (i = 0; i < vcpu->arch.slb_max; i++) {
 		sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
 		sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+				  struct kvm_sregs *sregs)
 {
 	int i, j;
 
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,18 +1548,27 @@ static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
 		return -ENOMEM;
 
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
+	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,7 +1811,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5de100..8cd0daebb82d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,33 +13,34 @@
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
-
-/*************** RMA *************/
-
+#include "book3s_hv_cma.h"
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
 {
+	pr_debug("%s(%s)\n", __func__, p);
 	if (!p)
-		return 1;
-
-	kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
 }
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	unsigned long align_pages = HPT_ALIGN_PAGES;
+
+	/* Old CPUs require HPT aligned on a multiple of its size */
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		align_pages = nr_pages;
+	return kvm_alloc_cma(nr_pages, align_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-	kvm_release_linear(li);
+	kvm_release_cma(page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
  */
-void __init kvm_linear_init(void)
+void __init kvm_cma_reserve(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
+	unsigned long align_size;
+	struct memblock_region *reg;
+	phys_addr_t selected_size = 0;
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		selected_size += memblock_region_memory_end_pfn(reg) -
+				 memblock_region_memory_base_pfn(reg);
+
+	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		/*
+		 * Old CPUs require HPT aligned on a multiple of its size. So for them
+		 * make the alignment as max size we could request.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			align_size = __rounddown_pow_of_two(selected_size);
+		else
+			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 000000000000..d9d3d8553d51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,240 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "book3s_hv_cma.h"
+
+struct kvm_cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ *			          for kvm hash pagetable
+ * @size:  Size of the reserved memory.
+ * @alignment:  Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+	long base_pfn;
+	phys_addr_t addr;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+	if (!size)
+		return -EINVAL;
+	/*
+	 * Sanitise input arguments.
+	 * We should be pageblock aligned for CMA.
+	 */
+	alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+	size = ALIGN(size, alignment);
+	/*
+	 * Reserve memory
+	 * Use __memblock_alloc_base() since
+	 * memblock_alloc_base() panic()s.
+	 */
+	addr = __memblock_alloc_base(size, alignment, 0);
+	if (!addr) {
+		base_pfn = -ENOMEM;
+		goto err;
+	} else
+		base_pfn = PFN_DOWN(addr);
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = base_pfn;
+	cma->count    = size >> PAGE_SHIFT;
+	pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+	int ret;
+	struct page *page = NULL;
+	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
+	unsigned long mask, pfn, pageno, start = 0;
+
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+		 (void *)cma, nr_pages, align_pages);
+
+	if (!nr_pages)
+		return NULL;
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
+	VM_BUG_ON(!is_power_of_2(align_pages));
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+	mutex_lock(&kvm_cma_mutex);
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
+			break;
+
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+		if (ret == 0) {
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
+			page = pfn_to_page(pfn);
+			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+			break;
+		} else if (ret != -EBUSY) {
+			break;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+	mutex_unlock(&kvm_cma_mutex);
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+	unsigned long pfn;
+	unsigned long nr_chunk;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+	mutex_lock(&kvm_cma_mutex);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
+	free_contig_range(pfn, nr_pages);
+	mutex_unlock(&kvm_cma_mutex);
+
+	return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+					unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int bitmap_size, ret;
+	unsigned long chunk_count;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s()\n", __func__);
+	if (!cma->count)
+		return 0;
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	kfree(cma->bitmap);
+	return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 000000000000..655144f75fa5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,27 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+				  unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+				       phys_addr_t alignment) __init;
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fc25689a9f35..45e30d6e462b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
+/*
+ * tlbie/tlbiel is a bit different on the PPC970 compared to later
+ * processors such as POWER7; the large page bit is in the instruction
+ * not RB, and the top 16 bits and the bottom 12 bits of the VA
+ * in RB must be 0.
+ */
+static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
+			  long npages, int global, bool need_sync)
+{
+	long i;
+
+	if (global) {
+		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+			cpu_relax();
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			unsigned long rb = rbvalues[i];
+
+			if (rb & 1)		/* large page */
+				asm volatile("tlbie %0,1" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+			else
+				asm volatile("tlbie %0,0" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+		}
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+		kvm->arch.tlbie_lock = 0;
+	} else {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			unsigned long rb = rbvalues[i];
+
+			if (rb & 1)		/* large page */
+				asm volatile("tlbiel %0,1" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+			else
+				asm volatile("tlbiel %0,0" : :
+					     "r" (rb & 0x0000fffffffff000ul));
+		}
+		asm volatile("ptesync" : : : "memory");
+	}
+}
+
+static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+		      long npages, int global, bool need_sync)
+{
+	long i;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+		/* PPC970 tlbie instruction is a bit different */
+		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
+		return;
+	}
+	if (global) {
+		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+			cpu_relax();
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i)
+			asm volatile(PPC_TLBIE(%1,%0) : :
+				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+		kvm->arch.tlbie_lock = 0;
+	} else {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i)
+			asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+		asm volatile("ptesync" : : : "memory");
+	}
+}
+
 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 			unsigned long pte_index, unsigned long avpn,
 			unsigned long *hpret)
@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		hpte[0] &= ~HPTE_V_VALID;
 		rb = compute_tlbie_rb(v, hpte[1], pte_index);
-		if (global_invalidates(kvm, flags)) {
-			while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-				     : : "r" (rb), "r" (kvm->arch.lpid));
-			asm volatile("ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			asm volatile("tlbiel %0" : : "r" (rb));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/* Read PTE low word after tlbie to get final R/C values */
 		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
 	}
@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 	unsigned long *hp, *hptes[4], tlbrb[4];
 	long int i, j, k, n, found, indexes[4];
 	unsigned long flags, req, pte_index, rcbits;
-	long int local = 0;
+	int global;
 	long int ret = H_SUCCESS;
 	struct revmap_entry *rev, *revs[4];
 
-	if (atomic_read(&kvm->online_vcpus) == 1)
-		local = 1;
+	global = global_invalidates(kvm, 0);
 	for (i = 0; i < 4 && ret == H_SUCCESS; ) {
 		n = 0;
 		for (; i < 4; ++i) {
@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			break;
 
 		/* Now that we've collected a batch, do the tlbies */
-		if (!local) {
-			while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			for (k = 0; k < n; ++k)
-				asm volatile(PPC_TLBIE(%1,%0) : :
-					     "r" (tlbrb[k]),
-					     "r" (kvm->arch.lpid));
-			asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			for (k = 0; k < n; ++k)
-				asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, tlbrb, n, global, true);
 
 		/* Read PTE low words after tlbie to get final R/C values */
 		for (k = 0; k < n; ++k) {
@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		rb = compute_tlbie_rb(v, r, pte_index);
 		hpte[0] = v & ~HPTE_V_VALID;
-		if (global_invalidates(kvm, flags)) {
-			while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
-				cpu_relax();
-			asm volatile("ptesync" : : : "memory");
-			asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-				     : : "r" (rb), "r" (kvm->arch.lpid));
-			asm volatile("ptesync" : : : "memory");
-			kvm->arch.tlbie_lock = 0;
-		} else {
-			asm volatile("ptesync" : : : "memory");
-			asm volatile("tlbiel %0" : : "r" (rb));
-			asm volatile("ptesync" : : : "memory");
-		}
+		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
 		 * If the host has this page as readonly but the guest
 		 * wants to make it read/write, reduce the permissions.
@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
 
 	hptep[0] &= ~HPTE_V_VALID;
 	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
-	while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-		cpu_relax();
-	asm volatile("ptesync" : : : "memory");
-	asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-		     : : "r" (rb), "r" (kvm->arch.lpid));
-	asm volatile("ptesync" : : : "memory");
-	kvm->arch.tlbie_lock = 0;
+	do_tlbies(kvm, &rb, 1, 1, true);
 }
 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 
@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
 	rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
 	/* modify only the second-last byte, which contains the ref bit */
 	*((char *)hptep + 14) = rbyte;
-	while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-		cpu_relax();
-	asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-		     : : "r" (rb), "r" (kvm->arch.lpid));
-	asm volatile("ptesync" : : : "memory");
-	kvm->arch.tlbie_lock = 0;
+	do_tlbies(kvm, &rb, 1, 1, false);
 }
 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b02f91e4c70d..60dce5bfab3f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1381,7 +1381,7 @@ hcall_try_real_mode:
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
 	LOAD_REG_ADDR(r4, hcall_real_table)
-	lwzx	r3,r3,r4
+	lwax	r3,r3,r4
 	cmpwi	r3,0
 	beq	guest_exit_cont
 	add	r3,r3,r4
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 48cbbf862958..17cfae5497a3 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -92,6 +92,11 @@ kvm_start_lightweight:
 	PPC_LL	r3, VCPU_HFLAGS(r4)
 	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
 	stb	r3, HSTATE_RESTORE_HID5(r13)
+
+	/* Load up guest SPRG3 value, since it's user readable */
+	ld	r3, VCPU_SHARED(r4)
+	ld	r3, VCPU_SHARED_SPRG3(r3)
+	mtspr	SPRN_SPRG3, r3
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 	PPC_LL	r4, VCPU_SHADOW_MSR(r4)	/* get shadow_msr */
@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
 	/* R7 = vcpu */
 	PPC_LL	r7, GPR4(r1)
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Reload kernel SPRG3 value.
+	 * No need to save guest value as usermode can't modify SPRG3.
+	 */
+	ld	r3, PACA_SPRG3(r13)
+	mtspr	SPRN_SPRG3, r3
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 	PPC_STL	r14, VCPU_GPR(R14)(r7)
 	PPC_STL	r15, VCPU_GPR(R15)(r7)
 	PPC_STL	r16, VCPU_GPR(R16)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c6e13d9a9e15..27db1e665959 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 		 * both the traditional FP registers and the added VSX
 		 * registers into thread.fpr[].
 		 */
-		giveup_fpu(current);
+		if (current->thread.regs->msr & MSR_FP)
+			giveup_fpu(current);
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 
 #ifdef CONFIG_ALTIVEC
 	if (msr & MSR_VEC) {
-		giveup_altivec(current);
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
 		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
 		vcpu->arch.vscr = t->vscr;
 	}
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 #endif
 
-	current->thread.regs->msr |= msr;
-
 	if (msr & MSR_FP) {
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #endif
 	}
 
+	current->thread.regs->msr |= msr;
 	vcpu->arch.guest_owned_ext |= msr;
 	kvmppc_recalc_shadow_msr(vcpu);
 
 	return RESUME_GUEST;
 }
 
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+	unsigned long lost_ext;
+
+	lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
+	if (!lost_ext)
+		return;
+
+	if (lost_ext & MSR_FP)
+		kvmppc_load_up_fpu();
+	if (lost_ext & MSR_VEC)
+		kvmppc_load_up_altivec();
+	current->thread.regs->msr |= lost_ext;
+}
+
 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
@@ -772,7 +792,7 @@ program_interrupt:
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
 		if (vcpu->arch.papr_enabled &&
-		    (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
 		    !(vcpu->arch.shared->msr & MSR_PR)) {
 			/* SC 1 papr hypercalls */
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -890,8 +910,9 @@ program_interrupt:
 			local_irq_enable();
 			r = s;
 		} else {
-			kvmppc_lazy_ee_enable();
+			kvmppc_fix_ee_before_entry();
 		}
+		kvmppc_handle_lost_ext(vcpu);
 	}
 
 	trace_kvm_book3s_reenter(r, vcpu);
@@ -1162,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
-	kvmppc_lazy_ee_enable();
+	kvmppc_fix_ee_before_entry();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 94c1dd46b83d..a3a5cb8ee7ea 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,6 +19,7 @@
 #include <asm/hvcall.h>
 #include <asm/xics.h>
 #include <asm/debug.h>
+#include <asm/time.h>
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dcc94f016007..17722d82f1d1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	kvm_guest_enter();
-
 #ifdef CONFIG_PPC_FPU
 	/* Save userspace FPU state in stack */
 	enable_kernel_fp();
@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
-	kvmppc_lazy_ee_enable();
+	kvmppc_fix_ee_before_entry();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			local_irq_enable();
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 		} else {
-			kvmppc_lazy_ee_enable();
+			kvmppc_fix_ee_before_entry();
 		}
 	}
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..07c0106fab76 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			kvm_guest_exit();
 			continue;
 		}
-
-		trace_hardirqs_on();
 #endif
 
 		kvm_guest_enter();
@@ -420,6 +418,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return kvmppc_core_create_memslot(slot, npages);
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
@@ -823,39 +825,39 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 #endif
 #ifdef CONFIG_KVM_MPIC
 	case KVM_CAP_IRQ_MPIC: {
-		struct file *filp;
+		struct fd f;
 		struct kvm_device *dev;
 
 		r = -EBADF;
-		filp = fget(cap->args[0]);
-		if (!filp)
+		f = fdget(cap->args[0]);
+		if (!f.file)
 			break;
 
 		r = -EPERM;
-		dev = kvm_device_from_filp(filp);
+		dev = kvm_device_from_filp(f.file);
 		if (dev)
 			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
 
-		fput(filp);
+		fdput(f);
 		break;
 	}
 #endif
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS: {
-		struct file *filp;
+		struct fd f;
 		struct kvm_device *dev;
 
 		r = -EBADF;
-		filp = fget(cap->args[0]);
-		if (!filp)
+		f = fdget(cap->args[0]);
+		if (!f.file)
 			break;
 
 		r = -EPERM;
-		dev = kvm_device_from_filp(filp);
+		dev = kvm_device_from_filp(f.file);
 		if (dev)
 			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
 
-		fput(filp);
+		fdput(f);
 		break;
 	}
 #endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 4f51025f5b00..c77348c5d463 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -119,7 +119,7 @@ static void op_powerpc_stop(void)
                 model->global_stop();
 }
 
-static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
+static int op_powerpc_create_files(struct dentry *root)
 {
 	int i;
 
@@ -128,9 +128,9 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 	 * There is one mmcr0, mmcr1 and mmcra for setting the events for
 	 * all of the counters.
 	 */
-	oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
-	oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
-	oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
+	oprofilefs_create_ulong(root, "mmcr0", &sys.mmcr0);
+	oprofilefs_create_ulong(root, "mmcr1", &sys.mmcr1);
+	oprofilefs_create_ulong(root, "mmcra", &sys.mmcra);
 #ifdef CONFIG_OPROFILE_CELL
 	/* create a file the user tool can check to see what level of profiling
 	 * support exits with this kernel. Initialize bit mask to indicate
@@ -142,7 +142,7 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 	 * If the file does not exist, then the kernel only supports SPU
 	 * cycle profiling, PPU event and cycle profiling.
 	 */
-	oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support);
+	oprofilefs_create_ulong(root, "cell_support", &sys.cell_support);
 	sys.cell_support = 0x1; /* Note, the user OProfile tool must check
 				 * that this bit is set before attempting to
 				 * user SPU event profiling.  Older kernels
@@ -160,11 +160,11 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 		char buf[4];
 
 		snprintf(buf, sizeof buf, "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
+		dir = oprofilefs_mkdir(root, buf);
 
-		oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
+		oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
+		oprofilefs_create_ulong(dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(dir, "count", &ctr[i].count);
 
 		/*
 		 * Classic PowerPC doesn't support per-counter
@@ -173,14 +173,14 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 		 * Book-E style performance monitors, we do
 		 * support them.
 		 */
-		oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &ctr[i].user);
 
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
+		oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
 	}
 
-	oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel);
-	oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user);
+	oprofilefs_create_ulong(root, "enable_kernel", &sys.enable_kernel);
+	oprofilefs_create_ulong(root, "enable_user", &sys.enable_user);
 
 	/* Default to tracing both kernel and user */
 	sys.enable_kernel = 1;
diff --git a/arch/powerpc/perf/power7-events-list.h b/arch/powerpc/perf/power7-events-list.h
new file mode 100644
index 000000000000..687790a2c0b8
--- /dev/null
+++ b/arch/powerpc/perf/power7-events-list.h
@@ -0,0 +1,548 @@
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2013 Runzhen Wang, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+EVENT(PM_IC_DEMAND_L2_BR_ALL,                 0x04898)
+EVENT(PM_GCT_UTIL_7_TO_10_SLOTS,              0x020a0)
+EVENT(PM_PMC2_SAVED,                          0x10022)
+EVENT(PM_CMPLU_STALL_DFU,                     0x2003c)
+EVENT(PM_VSU0_16FLOP,                         0x0a0a4)
+EVENT(PM_MRK_LSU_DERAT_MISS,                  0x3d05a)
+EVENT(PM_MRK_ST_CMPL,                         0x10034)
+EVENT(PM_NEST_PAIR3_ADD,                      0x40881)
+EVENT(PM_L2_ST_DISP,                          0x46180)
+EVENT(PM_L2_CASTOUT_MOD,                      0x16180)
+EVENT(PM_ISEG,                                0x020a4)
+EVENT(PM_MRK_INST_TIMEO,                      0x40034)
+EVENT(PM_L2_RCST_DISP_FAIL_ADDR,              0x36282)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM,         0x0d0b6)
+EVENT(PM_IERAT_WR_64K,                        0x040be)
+EVENT(PM_MRK_DTLB_MISS_16M,                   0x4d05e)
+EVENT(PM_IERAT_MISS,                          0x100f6)
+EVENT(PM_MRK_PTEG_FROM_LMEM,                  0x4d052)
+EVENT(PM_FLOP,                                0x100f4)
+EVENT(PM_THRD_PRIO_4_5_CYC,                   0x040b4)
+EVENT(PM_BR_PRED_TA,                          0x040aa)
+EVENT(PM_CMPLU_STALL_FXU,                     0x20014)
+EVENT(PM_EXT_INT,                             0x200f8)
+EVENT(PM_VSU_FSQRT_FDIV,                      0x0a888)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC,             0x1003e)
+EVENT(PM_LSU1_LDF,                            0x0c086)
+EVENT(PM_IC_WRITE_ALL,                        0x0488c)
+EVENT(PM_LSU0_SRQ_STFWD,                      0x0c0a0)
+EVENT(PM_PTEG_FROM_RL2L3_MOD,                 0x1c052)
+EVENT(PM_MRK_DATA_FROM_L31_SHR,               0x1d04e)
+EVENT(PM_DATA_FROM_L21_MOD,                   0x3c046)
+EVENT(PM_VSU1_SCAL_DOUBLE_ISSUED,             0x0b08a)
+EVENT(PM_VSU0_8FLOP,                          0x0a0a0)
+EVENT(PM_POWER_EVENT1,                        0x1006e)
+EVENT(PM_DISP_CLB_HELD_BAL,                   0x02092)
+EVENT(PM_VSU1_2FLOP,                          0x0a09a)
+EVENT(PM_LWSYNC_HELD,                         0x0209a)
+EVENT(PM_PTEG_FROM_DL2L3_SHR,                 0x3c054)
+EVENT(PM_INST_FROM_L21_MOD,                   0x34046)
+EVENT(PM_IERAT_XLATE_WR_16MPLUS,              0x040bc)
+EVENT(PM_IC_REQ_ALL,                          0x04888)
+EVENT(PM_DSLB_MISS,                           0x0d090)
+EVENT(PM_L3_MISS,                             0x1f082)
+EVENT(PM_LSU0_L1_PREF,                        0x0d0b8)
+EVENT(PM_VSU_SCALAR_SINGLE_ISSUED,            0x0b884)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0be)
+EVENT(PM_L2_INST,                             0x36080)
+EVENT(PM_VSU0_FRSP,                           0x0a0b4)
+EVENT(PM_FLUSH_DISP,                          0x02082)
+EVENT(PM_PTEG_FROM_L2MISS,                    0x4c058)
+EVENT(PM_VSU1_DQ_ISSUED,                      0x0b09a)
+EVENT(PM_CMPLU_STALL_LSU,                     0x20012)
+EVENT(PM_MRK_DATA_FROM_DMEM,                  0x1d04a)
+EVENT(PM_LSU_FLUSH_ULD,                       0x0c8b0)
+EVENT(PM_PTEG_FROM_LMEM,                      0x4c052)
+EVENT(PM_MRK_DERAT_MISS_16M,                  0x3d05c)
+EVENT(PM_THRD_ALL_RUN_CYC,                    0x2000c)
+EVENT(PM_MEM0_PREFETCH_DISP,                  0x20083)
+EVENT(PM_MRK_STALL_CMPLU_CYC_COUNT,           0x3003f)
+EVENT(PM_DATA_FROM_DL2L3_MOD,                 0x3c04c)
+EVENT(PM_VSU_FRSP,                            0x0a8b4)
+EVENT(PM_MRK_DATA_FROM_L21_MOD,               0x3d046)
+EVENT(PM_PMC1_OVERFLOW,                       0x20010)
+EVENT(PM_VSU0_SINGLE,                         0x0a0a8)
+EVENT(PM_MRK_PTEG_FROM_L3MISS,                0x2d058)
+EVENT(PM_MRK_PTEG_FROM_L31_SHR,               0x2d056)
+EVENT(PM_VSU0_VECTOR_SP_ISSUED,               0x0b090)
+EVENT(PM_VSU1_FEST,                           0x0a0ba)
+EVENT(PM_MRK_INST_DISP,                       0x20030)
+EVENT(PM_VSU0_COMPLEX_ISSUED,                 0x0b096)
+EVENT(PM_LSU1_FLUSH_UST,                      0x0c0b6)
+EVENT(PM_INST_CMPL,                           0x00002)
+EVENT(PM_FXU_IDLE,                            0x1000e)
+EVENT(PM_LSU0_FLUSH_ULD,                      0x0c0b0)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD,             0x3d04c)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC,           0x3001c)
+EVENT(PM_LSU1_REJECT_LMQ_FULL,                0x0c0a6)
+EVENT(PM_INST_PTEG_FROM_L21_MOD,              0x3e056)
+EVENT(PM_INST_FROM_RL2L3_MOD,                 0x14042)
+EVENT(PM_SHL_CREATED,                         0x05082)
+EVENT(PM_L2_ST_HIT,                           0x46182)
+EVENT(PM_DATA_FROM_DMEM,                      0x1c04a)
+EVENT(PM_L3_LD_MISS,                          0x2f082)
+EVENT(PM_FXU1_BUSY_FXU0_IDLE,                 0x4000e)
+EVENT(PM_DISP_CLB_HELD_RES,                   0x02094)
+EVENT(PM_L2_SN_SX_I_DONE,                     0x36382)
+EVENT(PM_GRP_CMPL,                            0x30004)
+EVENT(PM_STCX_CMPL,                           0x0c098)
+EVENT(PM_VSU0_2FLOP,                          0x0a098)
+EVENT(PM_L3_PREF_MISS,                        0x3f082)
+EVENT(PM_LSU_SRQ_SYNC_CYC,                    0x0d096)
+EVENT(PM_LSU_REJECT_ERAT_MISS,                0x20064)
+EVENT(PM_L1_ICACHE_MISS,                      0x200fc)
+EVENT(PM_LSU1_FLUSH_SRQ,                      0x0c0be)
+EVENT(PM_LD_REF_L1_LSU0,                      0x0c080)
+EVENT(PM_VSU0_FEST,                           0x0a0b8)
+EVENT(PM_VSU_VECTOR_SINGLE_ISSUED,            0x0b890)
+EVENT(PM_FREQ_UP,                             0x4000c)
+EVENT(PM_DATA_FROM_LMEM,                      0x3c04a)
+EVENT(PM_LSU1_LDX,                            0x0c08a)
+EVENT(PM_PMC3_OVERFLOW,                       0x40010)
+EVENT(PM_MRK_BR_MPRED,                        0x30036)
+EVENT(PM_SHL_MATCH,                           0x05086)
+EVENT(PM_MRK_BR_TAKEN,                        0x10036)
+EVENT(PM_CMPLU_STALL_BRU,                     0x4004e)
+EVENT(PM_ISLB_MISS,                           0x0d092)
+EVENT(PM_CYC,                                 0x0001e)
+EVENT(PM_DISP_HELD_THERMAL,                   0x30006)
+EVENT(PM_INST_PTEG_FROM_RL2L3_SHR,            0x2e054)
+EVENT(PM_LSU1_SRQ_STFWD,                      0x0c0a2)
+EVENT(PM_GCT_NOSLOT_BR_MPRED,                 0x4001a)
+EVENT(PM_1PLUS_PPC_CMPL,                      0x100f2)
+EVENT(PM_PTEG_FROM_DMEM,                      0x2c052)
+EVENT(PM_VSU_2FLOP,                           0x0a898)
+EVENT(PM_GCT_FULL_CYC,                        0x04086)
+EVENT(PM_MRK_DATA_FROM_L3_CYC,                0x40020)
+EVENT(PM_LSU_SRQ_S0_ALLOC,                    0x0d09d)
+EVENT(PM_MRK_DERAT_MISS_4K,                   0x1d05c)
+EVENT(PM_BR_MPRED_TA,                         0x040ae)
+EVENT(PM_INST_PTEG_FROM_L2MISS,               0x4e058)
+EVENT(PM_DPU_HELD_POWER,                      0x20006)
+EVENT(PM_RUN_INST_CMPL,                       0x400fa)
+EVENT(PM_MRK_VSU_FIN,                         0x30032)
+EVENT(PM_LSU_SRQ_S0_VALID,                    0x0d09c)
+EVENT(PM_GCT_EMPTY_CYC,                       0x20008)
+EVENT(PM_IOPS_DISP,                           0x30014)
+EVENT(PM_RUN_SPURR,                           0x10008)
+EVENT(PM_PTEG_FROM_L21_MOD,                   0x3c056)
+EVENT(PM_VSU0_1FLOP,                          0x0a080)
+EVENT(PM_SNOOP_TLBIE,                         0x0d0b2)
+EVENT(PM_DATA_FROM_L3MISS,                    0x2c048)
+EVENT(PM_VSU_SINGLE,                          0x0a8a8)
+EVENT(PM_DTLB_MISS_16G,                       0x1c05e)
+EVENT(PM_CMPLU_STALL_VECTOR,                  0x2001c)
+EVENT(PM_FLUSH,                               0x400f8)
+EVENT(PM_L2_LD_HIT,                           0x36182)
+EVENT(PM_NEST_PAIR2_AND,                      0x30883)
+EVENT(PM_VSU1_1FLOP,                          0x0a082)
+EVENT(PM_IC_PREF_REQ,                         0x0408a)
+EVENT(PM_L3_LD_HIT,                           0x2f080)
+EVENT(PM_GCT_NOSLOT_IC_MISS,                  0x2001a)
+EVENT(PM_DISP_HELD,                           0x10006)
+EVENT(PM_L2_LD,                               0x16080)
+EVENT(PM_LSU_FLUSH_SRQ,                       0x0c8bc)
+EVENT(PM_BC_PLUS_8_CONV,                      0x040b8)
+EVENT(PM_MRK_DATA_FROM_L31_MOD_CYC,           0x40026)
+EVENT(PM_CMPLU_STALL_VECTOR_LONG,             0x4004a)
+EVENT(PM_L2_RCST_BUSY_RC_FULL,                0x26282)
+EVENT(PM_TB_BIT_TRANS,                        0x300f8)
+EVENT(PM_THERMAL_MAX,                         0x40006)
+EVENT(PM_LSU1_FLUSH_ULD,                      0x0c0b2)
+EVENT(PM_LSU1_REJECT_LHS,                     0x0c0ae)
+EVENT(PM_LSU_LRQ_S0_ALLOC,                    0x0d09f)
+EVENT(PM_L3_CO_L31,                           0x4f080)
+EVENT(PM_POWER_EVENT4,                        0x4006e)
+EVENT(PM_DATA_FROM_L31_SHR,                   0x1c04e)
+EVENT(PM_BR_UNCOND,                           0x0409e)
+EVENT(PM_LSU1_DC_PREF_STREAM_ALLOC,           0x0d0aa)
+EVENT(PM_PMC4_REWIND,                         0x10020)
+EVENT(PM_L2_RCLD_DISP,                        0x16280)
+EVENT(PM_THRD_PRIO_2_3_CYC,                   0x040b2)
+EVENT(PM_MRK_PTEG_FROM_L2MISS,                0x4d058)
+EVENT(PM_IC_DEMAND_L2_BHT_REDIRECT,           0x04098)
+EVENT(PM_LSU_DERAT_MISS,                      0x200f6)
+EVENT(PM_IC_PREF_CANCEL_L2,                   0x04094)
+EVENT(PM_MRK_FIN_STALL_CYC_COUNT,             0x1003d)
+EVENT(PM_BR_PRED_CCACHE,                      0x040a0)
+EVENT(PM_GCT_UTIL_1_TO_2_SLOTS,               0x0209c)
+EVENT(PM_MRK_ST_CMPL_INT,                     0x30034)
+EVENT(PM_LSU_TWO_TABLEWALK_CYC,               0x0d0a6)
+EVENT(PM_MRK_DATA_FROM_L3MISS,                0x2d048)
+EVENT(PM_GCT_NOSLOT_CYC,                      0x100f8)
+EVENT(PM_LSU_SET_MPRED,                       0x0c0a8)
+EVENT(PM_FLUSH_DISP_TLBIE,                    0x0208a)
+EVENT(PM_VSU1_FCONV,                          0x0a0b2)
+EVENT(PM_DERAT_MISS_16G,                      0x4c05c)
+EVENT(PM_INST_FROM_LMEM,                      0x3404a)
+EVENT(PM_IC_DEMAND_L2_BR_REDIRECT,            0x0409a)
+EVENT(PM_CMPLU_STALL_SCALAR_LONG,             0x20018)
+EVENT(PM_INST_PTEG_FROM_L2,                   0x1e050)
+EVENT(PM_PTEG_FROM_L2,                        0x1c050)
+EVENT(PM_MRK_DATA_FROM_L21_SHR_CYC,           0x20024)
+EVENT(PM_MRK_DTLB_MISS_4K,                    0x2d05a)
+EVENT(PM_VSU0_FPSCR,                          0x0b09c)
+EVENT(PM_VSU1_VECT_DOUBLE_ISSUED,             0x0b082)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_MOD,             0x1d052)
+EVENT(PM_MEM0_RQ_DISP,                        0x10083)
+EVENT(PM_L2_LD_MISS,                          0x26080)
+EVENT(PM_VMX_RESULT_SAT_1,                    0x0b0a0)
+EVENT(PM_L1_PREF,                             0x0d8b8)
+EVENT(PM_MRK_DATA_FROM_LMEM_CYC,              0x2002c)
+EVENT(PM_GRP_IC_MISS_NONSPEC,                 0x1000c)
+EVENT(PM_PB_NODE_PUMP,                        0x10081)
+EVENT(PM_SHL_MERGED,                          0x05084)
+EVENT(PM_NEST_PAIR1_ADD,                      0x20881)
+EVENT(PM_DATA_FROM_L3,                        0x1c048)
+EVENT(PM_LSU_FLUSH,                           0x0208e)
+EVENT(PM_LSU_SRQ_SYNC_COUNT,                  0x0d097)
+EVENT(PM_PMC2_OVERFLOW,                       0x30010)
+EVENT(PM_LSU_LDF,                             0x0c884)
+EVENT(PM_POWER_EVENT3,                        0x3006e)
+EVENT(PM_DISP_WT,                             0x30008)
+EVENT(PM_CMPLU_STALL_REJECT,                  0x40016)
+EVENT(PM_IC_BANK_CONFLICT,                    0x04082)
+EVENT(PM_BR_MPRED_CR_TA,                      0x048ae)
+EVENT(PM_L2_INST_MISS,                        0x36082)
+EVENT(PM_CMPLU_STALL_ERAT_MISS,               0x40018)
+EVENT(PM_NEST_PAIR2_ADD,                      0x30881)
+EVENT(PM_MRK_LSU_FLUSH,                       0x0d08c)
+EVENT(PM_L2_LDST,                             0x16880)
+EVENT(PM_INST_FROM_L31_SHR,                   0x1404e)
+EVENT(PM_VSU0_FIN,                            0x0a0bc)
+EVENT(PM_LARX_LSU,                            0x0c894)
+EVENT(PM_INST_FROM_RMEM,                      0x34042)
+EVENT(PM_DISP_CLB_HELD_TLBIE,                 0x02096)
+EVENT(PM_MRK_DATA_FROM_DMEM_CYC,              0x2002e)
+EVENT(PM_BR_PRED_CR,                          0x040a8)
+EVENT(PM_LSU_REJECT,                          0x10064)
+EVENT(PM_GCT_UTIL_3_TO_6_SLOTS,               0x0209e)
+EVENT(PM_CMPLU_STALL_END_GCT_NOSLOT,          0x10028)
+EVENT(PM_LSU0_REJECT_LMQ_FULL,                0x0c0a4)
+EVENT(PM_VSU_FEST,                            0x0a8b8)
+EVENT(PM_NEST_PAIR0_AND,                      0x10883)
+EVENT(PM_PTEG_FROM_L3,                        0x2c050)
+EVENT(PM_POWER_EVENT2,                        0x2006e)
+EVENT(PM_IC_PREF_CANCEL_PAGE,                 0x04090)
+EVENT(PM_VSU0_FSQRT_FDIV,                     0x0a088)
+EVENT(PM_MRK_GRP_CMPL,                        0x40030)
+EVENT(PM_VSU0_SCAL_DOUBLE_ISSUED,             0x0b088)
+EVENT(PM_GRP_DISP,                            0x3000a)
+EVENT(PM_LSU0_LDX,                            0x0c088)
+EVENT(PM_DATA_FROM_L2,                        0x1c040)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD,             0x1d042)
+EVENT(PM_LD_REF_L1,                           0x0c880)
+EVENT(PM_VSU0_VECT_DOUBLE_ISSUED,             0x0b080)
+EVENT(PM_VSU1_2FLOP_DOUBLE,                   0x0a08e)
+EVENT(PM_THRD_PRIO_6_7_CYC,                   0x040b6)
+EVENT(PM_BC_PLUS_8_RSLV_TAKEN,                0x040ba)
+EVENT(PM_BR_MPRED_CR,                         0x040ac)
+EVENT(PM_L3_CO_MEM,                           0x4f082)
+EVENT(PM_LD_MISS_L1,                          0x400f0)
+EVENT(PM_DATA_FROM_RL2L3_MOD,                 0x1c042)
+EVENT(PM_LSU_SRQ_FULL_CYC,                    0x1001a)
+EVENT(PM_TABLEWALK_CYC,                       0x10026)
+EVENT(PM_MRK_PTEG_FROM_RMEM,                  0x3d052)
+EVENT(PM_LSU_SRQ_STFWD,                       0x0c8a0)
+EVENT(PM_INST_PTEG_FROM_RMEM,                 0x3e052)
+EVENT(PM_FXU0_FIN,                            0x10004)
+EVENT(PM_LSU1_L1_SW_PREF,                     0x0c09e)
+EVENT(PM_PTEG_FROM_L31_MOD,                   0x1c054)
+EVENT(PM_PMC5_OVERFLOW,                       0x10024)
+EVENT(PM_LD_REF_L1_LSU1,                      0x0c082)
+EVENT(PM_INST_PTEG_FROM_L21_SHR,              0x4e056)
+EVENT(PM_CMPLU_STALL_THRD,                    0x1001c)
+EVENT(PM_DATA_FROM_RMEM,                      0x3c042)
+EVENT(PM_VSU0_SCAL_SINGLE_ISSUED,             0x0b084)
+EVENT(PM_BR_MPRED_LSTACK,                     0x040a6)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD_CYC,         0x40028)
+EVENT(PM_LSU0_FLUSH_UST,                      0x0c0b4)
+EVENT(PM_LSU_NCST,                            0x0c090)
+EVENT(PM_BR_TAKEN,                            0x20004)
+EVENT(PM_INST_PTEG_FROM_LMEM,                 0x4e052)
+EVENT(PM_GCT_NOSLOT_BR_MPRED_IC_MISS,         0x4001c)
+EVENT(PM_DTLB_MISS_4K,                        0x2c05a)
+EVENT(PM_PMC4_SAVED,                          0x30022)
+EVENT(PM_VSU1_PERMUTE_ISSUED,                 0x0b092)
+EVENT(PM_SLB_MISS,                            0x0d890)
+EVENT(PM_LSU1_FLUSH_LRQ,                      0x0c0ba)
+EVENT(PM_DTLB_MISS,                           0x300fc)
+EVENT(PM_VSU1_FRSP,                           0x0a0b6)
+EVENT(PM_VSU_VECTOR_DOUBLE_ISSUED,            0x0b880)
+EVENT(PM_L2_CASTOUT_SHR,                      0x16182)
+EVENT(PM_DATA_FROM_DL2L3_SHR,                 0x3c044)
+EVENT(PM_VSU1_STF,                            0x0b08e)
+EVENT(PM_ST_FIN,                              0x200f0)
+EVENT(PM_PTEG_FROM_L21_SHR,                   0x4c056)
+EVENT(PM_L2_LOC_GUESS_WRONG,                  0x26480)
+EVENT(PM_MRK_STCX_FAIL,                       0x0d08e)
+EVENT(PM_LSU0_REJECT_LHS,                     0x0c0ac)
+EVENT(PM_IC_PREF_CANCEL_HIT,                  0x04092)
+EVENT(PM_L3_PREF_BUSY,                        0x4f080)
+EVENT(PM_MRK_BRU_FIN,                         0x2003a)
+EVENT(PM_LSU1_NCLD,                           0x0c08e)
+EVENT(PM_INST_PTEG_FROM_L31_MOD,              0x1e054)
+EVENT(PM_LSU_NCLD,                            0x0c88c)
+EVENT(PM_LSU_LDX,                             0x0c888)
+EVENT(PM_L2_LOC_GUESS_CORRECT,                0x16480)
+EVENT(PM_THRESH_TIMEO,                        0x10038)
+EVENT(PM_L3_PREF_ST,                          0x0d0ae)
+EVENT(PM_DISP_CLB_HELD_SYNC,                  0x02098)
+EVENT(PM_VSU_SIMPLE_ISSUED,                   0x0b894)
+EVENT(PM_VSU1_SINGLE,                         0x0a0aa)
+EVENT(PM_DATA_TABLEWALK_CYC,                  0x3001a)
+EVENT(PM_L2_RC_ST_DONE,                       0x36380)
+EVENT(PM_MRK_PTEG_FROM_L21_MOD,               0x3d056)
+EVENT(PM_LARX_LSU1,                           0x0c096)
+EVENT(PM_MRK_DATA_FROM_RMEM,                  0x3d042)
+EVENT(PM_DISP_CLB_HELD,                       0x02090)
+EVENT(PM_DERAT_MISS_4K,                       0x1c05c)
+EVENT(PM_L2_RCLD_DISP_FAIL_ADDR,              0x16282)
+EVENT(PM_SEG_EXCEPTION,                       0x028a4)
+EVENT(PM_FLUSH_DISP_SB,                       0x0208c)
+EVENT(PM_L2_DC_INV,                           0x26182)
+EVENT(PM_PTEG_FROM_DL2L3_MOD,                 0x4c054)
+EVENT(PM_DSEG,                                0x020a6)
+EVENT(PM_BR_PRED_LSTACK,                      0x040a2)
+EVENT(PM_VSU0_STF,                            0x0b08c)
+EVENT(PM_LSU_FX_FIN,                          0x10066)
+EVENT(PM_DERAT_MISS_16M,                      0x3c05c)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_MOD,             0x4d054)
+EVENT(PM_GCT_UTIL_11_PLUS_SLOTS,              0x020a2)
+EVENT(PM_INST_FROM_L3,                        0x14048)
+EVENT(PM_MRK_IFU_FIN,                         0x3003a)
+EVENT(PM_ITLB_MISS,                           0x400fc)
+EVENT(PM_VSU_STF,                             0x0b88c)
+EVENT(PM_LSU_FLUSH_UST,                       0x0c8b4)
+EVENT(PM_L2_LDST_MISS,                        0x26880)
+EVENT(PM_FXU1_FIN,                            0x40004)
+EVENT(PM_SHL_DEALLOCATED,                     0x05080)
+EVENT(PM_L2_SN_M_WR_DONE,                     0x46382)
+EVENT(PM_LSU_REJECT_SET_MPRED,                0x0c8a8)
+EVENT(PM_L3_PREF_LD,                          0x0d0ac)
+EVENT(PM_L2_SN_M_RD_DONE,                     0x46380)
+EVENT(PM_MRK_DERAT_MISS_16G,                  0x4d05c)
+EVENT(PM_VSU_FCONV,                           0x0a8b0)
+EVENT(PM_ANY_THRD_RUN_CYC,                    0x100fa)
+EVENT(PM_LSU_LMQ_FULL_CYC,                    0x0d0a4)
+EVENT(PM_MRK_LSU_REJECT_LHS,                  0x0d082)
+EVENT(PM_MRK_LD_MISS_L1_CYC,                  0x4003e)
+EVENT(PM_MRK_DATA_FROM_L2_CYC,                0x20020)
+EVENT(PM_INST_IMC_MATCH_DISP,                 0x30016)
+EVENT(PM_MRK_DATA_FROM_RMEM_CYC,              0x4002c)
+EVENT(PM_VSU0_SIMPLE_ISSUED,                  0x0b094)
+EVENT(PM_CMPLU_STALL_DIV,                     0x40014)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_SHR,             0x2d054)
+EVENT(PM_VSU_FMA_DOUBLE,                      0x0a890)
+EVENT(PM_VSU_4FLOP,                           0x0a89c)
+EVENT(PM_VSU1_FIN,                            0x0a0be)
+EVENT(PM_NEST_PAIR1_AND,                      0x20883)
+EVENT(PM_INST_PTEG_FROM_RL2L3_MOD,            0x1e052)
+EVENT(PM_RUN_CYC,                             0x200f4)
+EVENT(PM_PTEG_FROM_RMEM,                      0x3c052)
+EVENT(PM_LSU_LRQ_S0_VALID,                    0x0d09e)
+EVENT(PM_LSU0_LDF,                            0x0c084)
+EVENT(PM_FLUSH_COMPLETION,                    0x30012)
+EVENT(PM_ST_MISS_L1,                          0x300f0)
+EVENT(PM_L2_NODE_PUMP,                        0x36480)
+EVENT(PM_INST_FROM_DL2L3_SHR,                 0x34044)
+EVENT(PM_MRK_STALL_CMPLU_CYC,                 0x3003e)
+EVENT(PM_VSU1_DENORM,                         0x0a0ae)
+EVENT(PM_MRK_DATA_FROM_L31_SHR_CYC,           0x20026)
+EVENT(PM_NEST_PAIR0_ADD,                      0x10881)
+EVENT(PM_INST_FROM_L3MISS,                    0x24048)
+EVENT(PM_EE_OFF_EXT_INT,                      0x02080)
+EVENT(PM_INST_PTEG_FROM_DMEM,                 0x2e052)
+EVENT(PM_INST_FROM_DL2L3_MOD,                 0x3404c)
+EVENT(PM_PMC6_OVERFLOW,                       0x30024)
+EVENT(PM_VSU_2FLOP_DOUBLE,                    0x0a88c)
+EVENT(PM_TLB_MISS,                            0x20066)
+EVENT(PM_FXU_BUSY,                            0x2000e)
+EVENT(PM_L2_RCLD_DISP_FAIL_OTHER,             0x26280)
+EVENT(PM_LSU_REJECT_LMQ_FULL,                 0x0c8a4)
+EVENT(PM_IC_RELOAD_SHR,                       0x04096)
+EVENT(PM_GRP_MRK,                             0x10031)
+EVENT(PM_MRK_ST_NEST,                         0x20034)
+EVENT(PM_VSU1_FSQRT_FDIV,                     0x0a08a)
+EVENT(PM_LSU0_FLUSH_LRQ,                      0x0c0b8)
+EVENT(PM_LARX_LSU0,                           0x0c094)
+EVENT(PM_IBUF_FULL_CYC,                       0x04084)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR_CYC,         0x2002a)
+EVENT(PM_LSU_DC_PREF_STREAM_ALLOC,            0x0d8a8)
+EVENT(PM_GRP_MRK_CYC,                         0x10030)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR_CYC,         0x20028)
+EVENT(PM_L2_GLOB_GUESS_CORRECT,               0x16482)
+EVENT(PM_LSU_REJECT_LHS,                      0x0c8ac)
+EVENT(PM_MRK_DATA_FROM_LMEM,                  0x3d04a)
+EVENT(PM_INST_PTEG_FROM_L3,                   0x2e050)
+EVENT(PM_FREQ_DOWN,                           0x3000c)
+EVENT(PM_PB_RETRY_NODE_PUMP,                  0x30081)
+EVENT(PM_INST_FROM_RL2L3_SHR,                 0x1404c)
+EVENT(PM_MRK_INST_ISSUED,                     0x10032)
+EVENT(PM_PTEG_FROM_L3MISS,                    0x2c058)
+EVENT(PM_RUN_PURR,                            0x400f4)
+EVENT(PM_MRK_GRP_IC_MISS,                     0x40038)
+EVENT(PM_MRK_DATA_FROM_L3,                    0x1d048)
+EVENT(PM_CMPLU_STALL_DCACHE_MISS,             0x20016)
+EVENT(PM_PTEG_FROM_RL2L3_SHR,                 0x2c054)
+EVENT(PM_LSU_FLUSH_LRQ,                       0x0c8b8)
+EVENT(PM_MRK_DERAT_MISS_64K,                  0x2d05c)
+EVENT(PM_INST_PTEG_FROM_DL2L3_MOD,            0x4e054)
+EVENT(PM_L2_ST_MISS,                          0x26082)
+EVENT(PM_MRK_PTEG_FROM_L21_SHR,               0x4d056)
+EVENT(PM_LWSYNC,                              0x0d094)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0bc)
+EVENT(PM_MRK_LSU_FLUSH_LRQ,                   0x0d088)
+EVENT(PM_INST_IMC_MATCH_CMPL,                 0x100f0)
+EVENT(PM_NEST_PAIR3_AND,                      0x40883)
+EVENT(PM_PB_RETRY_SYS_PUMP,                   0x40081)
+EVENT(PM_MRK_INST_FIN,                        0x30030)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_SHR,             0x3d054)
+EVENT(PM_INST_FROM_L31_MOD,                   0x14044)
+EVENT(PM_MRK_DTLB_MISS_64K,                   0x3d05e)
+EVENT(PM_LSU_FIN,                             0x30066)
+EVENT(PM_MRK_LSU_REJECT,                      0x40064)
+EVENT(PM_L2_CO_FAIL_BUSY,                     0x16382)
+EVENT(PM_MEM0_WQ_DISP,                        0x40083)
+EVENT(PM_DATA_FROM_L31_MOD,                   0x1c044)
+EVENT(PM_THERMAL_WARN,                        0x10016)
+EVENT(PM_VSU0_4FLOP,                          0x0a09c)
+EVENT(PM_BR_MPRED_CCACHE,                     0x040a4)
+EVENT(PM_CMPLU_STALL_IFU,                     0x4004c)
+EVENT(PM_L1_DEMAND_WRITE,                     0x0408c)
+EVENT(PM_FLUSH_BR_MPRED,                      0x02084)
+EVENT(PM_MRK_DTLB_MISS_16G,                   0x1d05e)
+EVENT(PM_MRK_PTEG_FROM_DMEM,                  0x2d052)
+EVENT(PM_L2_RCST_DISP,                        0x36280)
+EVENT(PM_CMPLU_STALL,                         0x4000a)
+EVENT(PM_LSU_PARTIAL_CDF,                     0x0c0aa)
+EVENT(PM_DISP_CLB_HELD_SB,                    0x020a8)
+EVENT(PM_VSU0_FMA_DOUBLE,                     0x0a090)
+EVENT(PM_FXU0_BUSY_FXU1_IDLE,                 0x3000e)
+EVENT(PM_IC_DEMAND_CYC,                       0x10018)
+EVENT(PM_MRK_DATA_FROM_L21_SHR,               0x3d04e)
+EVENT(PM_MRK_LSU_FLUSH_UST,                   0x0d086)
+EVENT(PM_INST_PTEG_FROM_L3MISS,               0x2e058)
+EVENT(PM_VSU_DENORM,                          0x0a8ac)
+EVENT(PM_MRK_LSU_PARTIAL_CDF,                 0x0d080)
+EVENT(PM_INST_FROM_L21_SHR,                   0x3404e)
+EVENT(PM_IC_PREF_WRITE,                       0x0408e)
+EVENT(PM_BR_PRED,                             0x0409c)
+EVENT(PM_INST_FROM_DMEM,                      0x1404a)
+EVENT(PM_IC_PREF_CANCEL_ALL,                  0x04890)
+EVENT(PM_LSU_DC_PREF_STREAM_CONFIRM,          0x0d8b4)
+EVENT(PM_MRK_LSU_FLUSH_SRQ,                   0x0d08a)
+EVENT(PM_MRK_FIN_STALL_CYC,                   0x1003c)
+EVENT(PM_L2_RCST_DISP_FAIL_OTHER,             0x46280)
+EVENT(PM_VSU1_DD_ISSUED,                      0x0b098)
+EVENT(PM_PTEG_FROM_L31_SHR,                   0x2c056)
+EVENT(PM_DATA_FROM_L21_SHR,                   0x3c04e)
+EVENT(PM_LSU0_NCLD,                           0x0c08c)
+EVENT(PM_VSU1_4FLOP,                          0x0a09e)
+EVENT(PM_VSU1_8FLOP,                          0x0a0a2)
+EVENT(PM_VSU_8FLOP,                           0x0a8a0)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_CYC,               0x2003e)
+EVENT(PM_DTLB_MISS_64K,                       0x3c05e)
+EVENT(PM_THRD_CONC_RUN_INST,                  0x300f4)
+EVENT(PM_MRK_PTEG_FROM_L2,                    0x1d050)
+EVENT(PM_PB_SYS_PUMP,                         0x20081)
+EVENT(PM_VSU_FIN,                             0x0a8bc)
+EVENT(PM_MRK_DATA_FROM_L31_MOD,               0x1d044)
+EVENT(PM_THRD_PRIO_0_1_CYC,                   0x040b0)
+EVENT(PM_DERAT_MISS_64K,                      0x2c05c)
+EVENT(PM_PMC2_REWIND,                         0x30020)
+EVENT(PM_INST_FROM_L2,                        0x14040)
+EVENT(PM_GRP_BR_MPRED_NONSPEC,                0x1000a)
+EVENT(PM_INST_DISP,                           0x200f2)
+EVENT(PM_MEM0_RD_CANCEL_TOTAL,                0x30083)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM,         0x0d0b4)
+EVENT(PM_L1_DCACHE_RELOAD_VALID,              0x300f6)
+EVENT(PM_VSU_SCALAR_DOUBLE_ISSUED,            0x0b888)
+EVENT(PM_L3_PREF_HIT,                         0x3f080)
+EVENT(PM_MRK_PTEG_FROM_L31_MOD,               0x1d054)
+EVENT(PM_CMPLU_STALL_STORE,                   0x2004a)
+EVENT(PM_MRK_FXU_FIN,                         0x20038)
+EVENT(PM_PMC4_OVERFLOW,                       0x10010)
+EVENT(PM_MRK_PTEG_FROM_L3,                    0x2d050)
+EVENT(PM_LSU0_LMQ_LHR_MERGE,                  0x0d098)
+EVENT(PM_BTAC_HIT,                            0x0508a)
+EVENT(PM_L3_RD_BUSY,                          0x4f082)
+EVENT(PM_LSU0_L1_SW_PREF,                     0x0c09c)
+EVENT(PM_INST_FROM_L2MISS,                    0x44048)
+EVENT(PM_LSU0_DC_PREF_STREAM_ALLOC,           0x0d0a8)
+EVENT(PM_L2_ST,                               0x16082)
+EVENT(PM_VSU0_DENORM,                         0x0a0ac)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR,             0x3d044)
+EVENT(PM_BR_PRED_CR_TA,                       0x048aa)
+EVENT(PM_VSU0_FCONV,                          0x0a0b0)
+EVENT(PM_MRK_LSU_FLUSH_ULD,                   0x0d084)
+EVENT(PM_BTAC_MISS,                           0x05088)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC_COUNT,       0x1003f)
+EVENT(PM_MRK_DATA_FROM_L2,                    0x1d040)
+EVENT(PM_LSU_DCACHE_RELOAD_VALID,             0x0d0a2)
+EVENT(PM_VSU_FMA,                             0x0a884)
+EVENT(PM_LSU0_FLUSH_SRQ,                      0x0c0bc)
+EVENT(PM_LSU1_L1_PREF,                        0x0d0ba)
+EVENT(PM_IOPS_CMPL,                           0x10014)
+EVENT(PM_L2_SYS_PUMP,                         0x36482)
+EVENT(PM_L2_RCLD_BUSY_RC_FULL,                0x46282)
+EVENT(PM_LSU_LMQ_S0_ALLOC,                    0x0d0a1)
+EVENT(PM_FLUSH_DISP_SYNC,                     0x02088)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD_CYC,         0x4002a)
+EVENT(PM_L2_IC_INV,                           0x26180)
+EVENT(PM_MRK_DATA_FROM_L21_MOD_CYC,           0x40024)
+EVENT(PM_L3_PREF_LDST,                        0x0d8ac)
+EVENT(PM_LSU_SRQ_EMPTY_CYC,                   0x40008)
+EVENT(PM_LSU_LMQ_S0_VALID,                    0x0d0a0)
+EVENT(PM_FLUSH_PARTIAL,                       0x02086)
+EVENT(PM_VSU1_FMA_DOUBLE,                     0x0a092)
+EVENT(PM_1PLUS_PPC_DISP,                      0x400f2)
+EVENT(PM_DATA_FROM_L2MISS,                    0x200fe)
+EVENT(PM_SUSPENDED,                           0x00000)
+EVENT(PM_VSU0_FMA,                            0x0a084)
+EVENT(PM_CMPLU_STALL_SCALAR,                  0x40012)
+EVENT(PM_STCX_FAIL,                           0x0c09a)
+EVENT(PM_VSU0_FSQRT_FDIV_DOUBLE,              0x0a094)
+EVENT(PM_DC_PREF_DST,                         0x0d0b0)
+EVENT(PM_VSU1_SCAL_SINGLE_ISSUED,             0x0b086)
+EVENT(PM_L3_HIT,                              0x1f080)
+EVENT(PM_L2_GLOB_GUESS_WRONG,                 0x26482)
+EVENT(PM_MRK_DFU_FIN,                         0x20032)
+EVENT(PM_INST_FROM_L1,                        0x04080)
+EVENT(PM_BRU_FIN,                             0x10068)
+EVENT(PM_IC_DEMAND_REQ,                       0x04088)
+EVENT(PM_VSU1_FSQRT_FDIV_DOUBLE,              0x0a096)
+EVENT(PM_VSU1_FMA,                            0x0a086)
+EVENT(PM_MRK_LD_MISS_L1,                      0x20036)
+EVENT(PM_VSU0_2FLOP_DOUBLE,                   0x0a08c)
+EVENT(PM_LSU_DC_PREF_STRIDED_STREAM_CONFIRM,  0x0d8bc)
+EVENT(PM_INST_PTEG_FROM_L31_SHR,              0x2e056)
+EVENT(PM_MRK_LSU_REJECT_ERAT_MISS,            0x30064)
+EVENT(PM_MRK_DATA_FROM_L2MISS,                0x4d048)
+EVENT(PM_DATA_FROM_RL2L3_SHR,                 0x1c04c)
+EVENT(PM_INST_FROM_PREF,                      0x14046)
+EVENT(PM_VSU1_SQ,                             0x0b09e)
+EVENT(PM_L2_LD_DISP,                          0x36180)
+EVENT(PM_L2_DISP_ALL,                         0x46080)
+EVENT(PM_THRD_GRP_CMPL_BOTH_CYC,              0x10012)
+EVENT(PM_VSU_FSQRT_FDIV_DOUBLE,               0x0a894)
+EVENT(PM_BR_MPRED,                            0x400f6)
+EVENT(PM_INST_PTEG_FROM_DL2L3_SHR,            0x3e054)
+EVENT(PM_VSU_1FLOP,                           0x0a880)
+EVENT(PM_HV_CYC,                              0x2000a)
+EVENT(PM_MRK_LSU_FIN,                         0x40032)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR,             0x1d04c)
+EVENT(PM_DTLB_MISS_16M,                       0x4c05e)
+EVENT(PM_LSU1_LMQ_LHR_MERGE,                  0x0d09a)
+EVENT(PM_IFU_FIN,                             0x40066)
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index d1821b8bbc4c..56c67bca2f75 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -53,37 +53,13 @@
 /*
  * Power7 event codes.
  */
-#define	PME_PM_CYC			0x1e
-#define	PME_PM_GCT_NOSLOT_CYC		0x100f8
-#define	PME_PM_CMPLU_STALL		0x4000a
-#define	PME_PM_INST_CMPL		0x2
-#define	PME_PM_LD_REF_L1		0xc880
-#define	PME_PM_LD_MISS_L1		0x400f0
-#define	PME_PM_BRU_FIN			0x10068
-#define	PME_PM_BR_MPRED			0x400f6
-
-#define PME_PM_CMPLU_STALL_FXU			0x20014
-#define PME_PM_CMPLU_STALL_DIV			0x40014
-#define PME_PM_CMPLU_STALL_SCALAR		0x40012
-#define PME_PM_CMPLU_STALL_SCALAR_LONG		0x20018
-#define PME_PM_CMPLU_STALL_VECTOR		0x2001c
-#define PME_PM_CMPLU_STALL_VECTOR_LONG		0x4004a
-#define PME_PM_CMPLU_STALL_LSU			0x20012
-#define PME_PM_CMPLU_STALL_REJECT		0x40016
-#define PME_PM_CMPLU_STALL_ERAT_MISS		0x40018
-#define PME_PM_CMPLU_STALL_DCACHE_MISS		0x20016
-#define PME_PM_CMPLU_STALL_STORE		0x2004a
-#define PME_PM_CMPLU_STALL_THRD			0x1001c
-#define PME_PM_CMPLU_STALL_IFU			0x4004c
-#define PME_PM_CMPLU_STALL_BRU			0x4004e
-#define PME_PM_GCT_NOSLOT_IC_MISS		0x2001a
-#define PME_PM_GCT_NOSLOT_BR_MPRED		0x4001a
-#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS	0x4001c
-#define PME_PM_GRP_CMPL				0x30004
-#define PME_PM_1PLUS_PPC_CMPL			0x100f2
-#define PME_PM_CMPLU_STALL_DFU			0x2003c
-#define PME_PM_RUN_CYC				0x200f4
-#define PME_PM_RUN_INST_CMPL			0x400fa
+#define EVENT(_name, _code) \
+	PME_##_name = _code,
+
+enum {
+#include "power7-events-list.h"
+};
+#undef EVENT
 
 /*
  * Layout of constraint bits:
@@ -398,96 +374,36 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 };
 
 
-GENERIC_EVENT_ATTR(cpu-cycles,			CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-frontend,	GCT_NOSLOT_CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-backend,	CMPLU_STALL);
-GENERIC_EVENT_ATTR(instructions,		INST_CMPL);
-GENERIC_EVENT_ATTR(cache-references,		LD_REF_L1);
-GENERIC_EVENT_ATTR(cache-misses,		LD_MISS_L1);
-GENERIC_EVENT_ATTR(branch-instructions,		BRU_FIN);
-GENERIC_EVENT_ATTR(branch-misses,		BR_MPRED);
-
-POWER_EVENT_ATTR(CYC,				CYC);
-POWER_EVENT_ATTR(GCT_NOSLOT_CYC,		GCT_NOSLOT_CYC);
-POWER_EVENT_ATTR(CMPLU_STALL,			CMPLU_STALL);
-POWER_EVENT_ATTR(INST_CMPL,			INST_CMPL);
-POWER_EVENT_ATTR(LD_REF_L1,			LD_REF_L1);
-POWER_EVENT_ATTR(LD_MISS_L1,			LD_MISS_L1);
-POWER_EVENT_ATTR(BRU_FIN,			BRU_FIN)
-POWER_EVENT_ATTR(BR_MPRED,			BR_MPRED);
-
-POWER_EVENT_ATTR(CMPLU_STALL_FXU,		CMPLU_STALL_FXU);
-POWER_EVENT_ATTR(CMPLU_STALL_DIV,		CMPLU_STALL_DIV);
-POWER_EVENT_ATTR(CMPLU_STALL_SCALAR,		CMPLU_STALL_SCALAR);
-POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG,	CMPLU_STALL_SCALAR_LONG);
-POWER_EVENT_ATTR(CMPLU_STALL_VECTOR,		CMPLU_STALL_VECTOR);
-POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG,	CMPLU_STALL_VECTOR_LONG);
-POWER_EVENT_ATTR(CMPLU_STALL_LSU,		CMPLU_STALL_LSU);
-POWER_EVENT_ATTR(CMPLU_STALL_REJECT,		CMPLU_STALL_REJECT);
-
-POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS,		CMPLU_STALL_ERAT_MISS);
-POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS,	CMPLU_STALL_DCACHE_MISS);
-POWER_EVENT_ATTR(CMPLU_STALL_STORE,		CMPLU_STALL_STORE);
-POWER_EVENT_ATTR(CMPLU_STALL_THRD,		CMPLU_STALL_THRD);
-POWER_EVENT_ATTR(CMPLU_STALL_IFU,		CMPLU_STALL_IFU);
-POWER_EVENT_ATTR(CMPLU_STALL_BRU,		CMPLU_STALL_BRU);
-POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS,		GCT_NOSLOT_IC_MISS);
-
-POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED,		GCT_NOSLOT_BR_MPRED);
-POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS,	GCT_NOSLOT_BR_MPRED_IC_MISS);
-POWER_EVENT_ATTR(GRP_CMPL,			GRP_CMPL);
-POWER_EVENT_ATTR(1PLUS_PPC_CMPL,		1PLUS_PPC_CMPL);
-POWER_EVENT_ATTR(CMPLU_STALL_DFU,		CMPLU_STALL_DFU);
-POWER_EVENT_ATTR(RUN_CYC,			RUN_CYC);
-POWER_EVENT_ATTR(RUN_INST_CMPL,			RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED);
+
+#define EVENT(_name, _code)     POWER_EVENT_ATTR(_name, _name);
+#include "power7-events-list.h"
+#undef EVENT
+
+#define EVENT(_name, _code)     POWER_EVENT_PTR(_name),
 
 static struct attribute *power7_events_attr[] = {
-	GENERIC_EVENT_PTR(CYC),
-	GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
-	GENERIC_EVENT_PTR(CMPLU_STALL),
-	GENERIC_EVENT_PTR(INST_CMPL),
-	GENERIC_EVENT_PTR(LD_REF_L1),
-	GENERIC_EVENT_PTR(LD_MISS_L1),
-	GENERIC_EVENT_PTR(BRU_FIN),
-	GENERIC_EVENT_PTR(BR_MPRED),
-
-	POWER_EVENT_PTR(CYC),
-	POWER_EVENT_PTR(GCT_NOSLOT_CYC),
-	POWER_EVENT_PTR(CMPLU_STALL),
-	POWER_EVENT_PTR(INST_CMPL),
-	POWER_EVENT_PTR(LD_REF_L1),
-	POWER_EVENT_PTR(LD_MISS_L1),
-	POWER_EVENT_PTR(BRU_FIN),
-	POWER_EVENT_PTR(BR_MPRED),
-
-	POWER_EVENT_PTR(CMPLU_STALL_FXU),
-	POWER_EVENT_PTR(CMPLU_STALL_DIV),
-	POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
-	POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
-	POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
-	POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
-	POWER_EVENT_PTR(CMPLU_STALL_LSU),
-	POWER_EVENT_PTR(CMPLU_STALL_REJECT),
-
-	POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
-	POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
-	POWER_EVENT_PTR(CMPLU_STALL_STORE),
-	POWER_EVENT_PTR(CMPLU_STALL_THRD),
-	POWER_EVENT_PTR(CMPLU_STALL_IFU),
-	POWER_EVENT_PTR(CMPLU_STALL_BRU),
-	POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
-	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
-
-	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
-	POWER_EVENT_PTR(GRP_CMPL),
-	POWER_EVENT_PTR(1PLUS_PPC_CMPL),
-	POWER_EVENT_PTR(CMPLU_STALL_DFU),
-	POWER_EVENT_PTR(RUN_CYC),
-	POWER_EVENT_PTR(RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(PM_BRU_FIN),
+	GENERIC_EVENT_PTR(PM_BR_MPRED),
+
+	#include "power7-events-list.h"
+	#undef EVENT
 	NULL
 };
 
-
 static struct attribute_group power7_pmu_events_group = {
 	.name = "events",
 	.attrs = power7_events_attr,
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 4cfa49901c02..534574a97ec9 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/of_gpio.h>
-#include <linux/of_i2c.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index cba1e6be68e5..ce73ce865613 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -90,7 +90,7 @@ static int __init ps3_rtc_init(void)
 
 	pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
 
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 module_init(ps3_rtc_init);
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 6a5f2b1f32ca..d276cd3edd8f 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -539,36 +539,6 @@ static int zip_oops(size_t text_len)
 }
 
 #ifdef CONFIG_PSTORE
-/* Derived from logfs_uncompress */
-int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen)
-{
-	int err, ret;
-
-	ret = -EIO;
-	err = zlib_inflateInit(&stream);
-	if (err != Z_OK)
-		goto error;
-
-	stream.next_in = in;
-	stream.avail_in = inlen;
-	stream.total_in = 0;
-	stream.next_out = out;
-	stream.avail_out = outlen;
-	stream.total_out = 0;
-
-	err = zlib_inflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END)
-		goto error;
-
-	err = zlib_inflateEnd(&stream);
-	if (err != Z_OK)
-		goto error;
-
-	ret = stream.total_out;
-error:
-	return ret;
-}
-
 static int nvram_pstore_open(struct pstore_info *psi)
 {
 	/* Reset the iterator to start reading partitions again */
@@ -584,7 +554,7 @@ static int nvram_pstore_open(struct pstore_info *psi)
  * @part:               pstore writes data to registered buffer in parts,
  *                      part number will indicate the same.
  * @count:              Indicates oops count
- * @hsize:              Size of header added by pstore
+ * @compressed:         Flag to indicate the log is compressed
  * @size:               number of bytes written to the registered buffer
  * @psi:                registered pstore_info structure
  *
@@ -595,7 +565,7 @@ static int nvram_pstore_open(struct pstore_info *psi)
 static int nvram_pstore_write(enum pstore_type_id type,
 				enum kmsg_dump_reason reason,
 				u64 *id, unsigned int part, int count,
-				size_t hsize, size_t size,
+				bool compressed, size_t size,
 				struct pstore_info *psi)
 {
 	int rc;
@@ -611,30 +581,11 @@ static int nvram_pstore_write(enum pstore_type_id type,
 	oops_hdr->report_length = (u16) size;
 	oops_hdr->timestamp = get_seconds();
 
-	if (big_oops_buf) {
-		rc = zip_oops(size);
-		/*
-		 * If compression fails copy recent log messages from
-		 * big_oops_buf to oops_data.
-		 */
-		if (rc != 0) {
-			size_t diff = size - oops_data_sz + hsize;
-
-			if (size > oops_data_sz) {
-				memcpy(oops_data, big_oops_buf, hsize);
-				memcpy(oops_data + hsize, big_oops_buf + diff,
-					oops_data_sz - hsize);
-
-				oops_hdr->report_length = (u16) oops_data_sz;
-			} else
-				memcpy(oops_data, big_oops_buf, size);
-		} else
-			err_type = ERR_TYPE_KERNEL_PANIC_GZ;
-	}
+	if (compressed)
+		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
 
 	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
-		count);
+		(int) (sizeof(*oops_hdr) + size), err_type, count);
 
 	if (rc != 0)
 		return rc;
@@ -650,12 +601,12 @@ static int nvram_pstore_write(enum pstore_type_id type,
  */
 static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
 				int *count, struct timespec *time, char **buf,
-				struct pstore_info *psi)
+				bool *compressed, struct pstore_info *psi)
 {
 	struct oops_log_info *oops_hdr;
 	unsigned int err_type, id_no, size = 0;
 	struct nvram_os_partition *part = NULL;
-	char *buff = NULL, *big_buff = NULL;
+	char *buff = NULL;
 	int sig = 0;
 	loff_t p;
 
@@ -719,8 +670,7 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
 		*id = id_no;
 
 	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
-		int length, unzipped_len;
-		size_t hdr_size;
+		size_t length, hdr_size;
 
 		oops_hdr = (struct oops_log_info *)buff;
 		if (oops_hdr->version < OOPS_HDR_VERSION) {
@@ -741,23 +691,10 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
 		memcpy(*buf, buff + hdr_size, length);
 		kfree(buff);
 
-		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
-			big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
-			if (!big_buff)
-				return -ENOMEM;
-
-			unzipped_len = nvram_decompress(*buf, big_buff,
-						length, big_oops_buf_sz);
-
-			if (unzipped_len < 0) {
-				pr_err("nvram: decompression failed, returned "
-					"rc %d\n", unzipped_len);
-				kfree(big_buff);
-			} else {
-				*buf = big_buff;
-				length = unzipped_len;
-			}
-		}
+		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+			*compressed = true;
+		else
+			*compressed = false;
 		return length;
 	}
 
@@ -777,13 +714,8 @@ static int nvram_pstore_init(void)
 {
 	int rc = 0;
 
-	if (big_oops_buf) {
-		nvram_pstore_info.buf = big_oops_buf;
-		nvram_pstore_info.bufsize = big_oops_buf_sz;
-	} else {
-		nvram_pstore_info.buf = oops_data;
-		nvram_pstore_info.bufsize = oops_data_sz;
-	}
+	nvram_pstore_info.buf = oops_data;
+	nvram_pstore_info.bufsize = oops_data_sz;
 
 	rc = pstore_register(&nvram_pstore_info);
 	if (rc != 0)
@@ -802,7 +734,6 @@ static int nvram_pstore_init(void)
 static void __init nvram_init_oops_partition(int rtas_partition_exists)
 {
 	int rc;
-	size_t size;
 
 	rc = pseries_nvram_init_os_partition(&oops_log_partition);
 	if (rc != 0) {
@@ -823,6 +754,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
 	oops_data = oops_buf + sizeof(struct oops_log_info);
 	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
 
+	rc = nvram_pstore_init();
+
+	if (!rc)
+		return;
+
 	/*
 	 * Figure compression (preceded by elimination of each line's <n>
 	 * severity prefix) will reduce the oops/panic report to at most
@@ -831,9 +767,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
 	big_oops_buf_sz = (oops_data_sz * 100) / 45;
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
-		size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL),
-			zlib_inflate_workspacesize());
-		stream.workspace = kmalloc(size, GFP_KERNEL);
+		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
+					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
 		if (!stream.workspace) {
 			pr_err("nvram: No memory for compression workspace; "
 				"skipping compression of %s partition data\n",
@@ -847,11 +782,6 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
 		stream.workspace = NULL;
 	}
 
-	rc = nvram_pstore_init();
-
-	if (!rc)
-		return;
-
 	rc = kmsg_dump_register(&nvram_kmsg_dumper);
 	if (rc != 0) {
 		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
diff --git a/arch/powerpc/sysdev/rtc_cmos_setup.c b/arch/powerpc/sysdev/rtc_cmos_setup.c
index af79e1ea74b6..af0f9beddca9 100644
--- a/arch/powerpc/sysdev/rtc_cmos_setup.c
+++ b/arch/powerpc/sysdev/rtc_cmos_setup.c
@@ -62,7 +62,7 @@ static int  __init add_rtc(void)
 	pd = platform_device_register_simple("rtc_cmos", -1,
 					     &res[0], num_res);
 
-	return PTR_RET(pd);
+	return PTR_ERR_OR_ZERO(pd);
 }
 fs_initcall(add_rtc);
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8a4cae78f03c..8b7892bf6d8b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -116,6 +116,7 @@ config S390
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_GENERIC_HARDIRQS
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
@@ -445,6 +446,16 @@ config PCI_NR_FUNCTIONS
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
 
+config PCI_NR_MSI
+	int "Maximum number of MSI interrupts (64-32768)"
+	range 64 32768
+	default "256"
+	help
+	  This defines the number of virtual interrupts the kernel will
+	  provide for MSI interrupts. If you configure your system to have
+	  too few drivers will fail to allocate MSI interrupts for all
+	  PCI devices.
+
 source "drivers/pci/Kconfig"
 source "drivers/pci/pcie/Kconfig"
 source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index f41e0ef7fdf9..79f2ac55253f 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -18,26 +18,23 @@
 #define UPDATE_FILE_MODE 0220
 #define DIR_MODE         0550
 
-extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
-				  const char *name);
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
 
-extern struct dentry *hypfs_create_u64(struct super_block *sb,
-				       struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
 				       __u64 value);
 
-extern struct dentry *hypfs_create_str(struct super_block *sb,
-				       struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
 				       char *string);
 
 /* LPAR Hypervisor */
 extern int hypfs_diag_init(void);
 extern void hypfs_diag_exit(void);
-extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_diag_create_files(struct dentry *root);
 
 /* VM Hypervisor */
 extern int hypfs_vm_init(void);
 extern void hypfs_vm_exit(void);
-extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_vm_create_files(struct dentry *root);
 
 /* debugfs interface */
 struct hypfs_dbfs_file;
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index bb5dd496614f..17ab8b7b53cc 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
 int hypfs_dbfs_init(void)
 {
 	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
-	return PTR_RET(dbfs_dir);
+	return PTR_ERR_OR_ZERO(dbfs_dir);
 }
 
 void hypfs_dbfs_exit(void)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 138893e5f736..5eeffeefae06 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -623,8 +623,7 @@ void hypfs_diag_exit(void)
  * *******************************************
  */
 
-static int hypfs_create_cpu_files(struct super_block *sb,
-				  struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
 {
 	struct dentry *cpu_dir;
 	char buffer[TMP_SIZE];
@@ -632,30 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb,
 
 	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
 							    cpu_info));
-	cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
-	rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
 			      cpu_info__acc_time(diag204_info_type, cpu_info) -
 			      cpu_info__lp_time(diag204_info_type, cpu_info));
 	if (IS_ERR(rc))
 		return PTR_ERR(rc);
-	rc = hypfs_create_u64(sb, cpu_dir, "cputime",
+	rc = hypfs_create_u64(cpu_dir, "cputime",
 			      cpu_info__lp_time(diag204_info_type, cpu_info));
 	if (IS_ERR(rc))
 		return PTR_ERR(rc);
 	if (diag204_info_type == INFO_EXT) {
-		rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
+		rc = hypfs_create_u64(cpu_dir, "onlinetime",
 				      cpu_info__online_time(diag204_info_type,
 							    cpu_info));
 		if (IS_ERR(rc))
 			return PTR_ERR(rc);
 	}
 	diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
 	return PTR_RET(rc);
 }
 
-static void *hypfs_create_lpar_files(struct super_block *sb,
-				     struct dentry *systems_dir, void *part_hdr)
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
 {
 	struct dentry *cpus_dir;
 	struct dentry *lpar_dir;
@@ -665,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
 
 	part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
 	lpar_name[LPAR_NAME_LEN] = 0;
-	lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
+	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
 	if (IS_ERR(lpar_dir))
 		return lpar_dir;
-	cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
+	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
 	if (IS_ERR(cpus_dir))
 		return cpus_dir;
 	cpu_info = part_hdr + part_hdr__size(diag204_info_type);
 	for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
 		int rc;
-		rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
+		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
 		if (rc)
 			return ERR_PTR(rc);
 		cpu_info += cpu_info__size(diag204_info_type);
@@ -682,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
 	return cpu_info;
 }
 
-static int hypfs_create_phys_cpu_files(struct super_block *sb,
-				       struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
 {
 	struct dentry *cpu_dir;
 	char buffer[TMP_SIZE];
@@ -691,32 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
 
 	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
 							    cpu_info));
-	cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
 	if (IS_ERR(cpu_dir))
 		return PTR_ERR(cpu_dir);
-	rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
 			      phys_cpu__mgm_time(diag204_info_type, cpu_info));
 	if (IS_ERR(rc))
 		return PTR_ERR(rc);
 	diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
 	return PTR_RET(rc);
 }
 
-static void *hypfs_create_phys_files(struct super_block *sb,
-				     struct dentry *parent_dir, void *phys_hdr)
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
 {
 	int i;
 	void *cpu_info;
 	struct dentry *cpus_dir;
 
-	cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
+	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
 	if (IS_ERR(cpus_dir))
 		return cpus_dir;
 	cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
 	for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
 		int rc;
-		rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
+		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
 		if (rc)
 			return ERR_PTR(rc);
 		cpu_info += phys_cpu__size(diag204_info_type);
@@ -724,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb,
 	return cpu_info;
 }
 
-int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_diag_create_files(struct dentry *root)
 {
 	struct dentry *systems_dir, *hyp_dir;
 	void *time_hdr, *part_hdr;
@@ -735,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
 	if (IS_ERR(buffer))
 		return PTR_ERR(buffer);
 
-	systems_dir = hypfs_mkdir(sb, root, "systems");
+	systems_dir = hypfs_mkdir(root, "systems");
 	if (IS_ERR(systems_dir)) {
 		rc = PTR_ERR(systems_dir);
 		goto err_out;
@@ -743,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
 	time_hdr = (struct x_info_blk_hdr *)buffer;
 	part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
 	for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
-		part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
+		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
 		if (IS_ERR(part_hdr)) {
 			rc = PTR_ERR(part_hdr);
 			goto err_out;
 		}
 	}
 	if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
-		ptr = hypfs_create_phys_files(sb, root, part_hdr);
+		ptr = hypfs_create_phys_files(root, part_hdr);
 		if (IS_ERR(ptr)) {
 			rc = PTR_ERR(ptr);
 			goto err_out;
 		}
 	}
-	hyp_dir = hypfs_mkdir(sb, root, "hyp");
+	hyp_dir = hypfs_mkdir(root, "hyp");
 	if (IS_ERR(hyp_dir)) {
 		rc = PTR_ERR(hyp_dir);
 		goto err_out;
 	}
-	ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
+	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
 	if (IS_ERR(ptr)) {
 		rc = PTR_ERR(ptr);
 		goto err_out;
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index f364dcf77e8e..24908ce149f1 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -107,16 +107,15 @@ static void diag2fc_free(const void *data)
 	vfree(data);
 }
 
-#define ATTRIBUTE(sb, dir, name, member) \
+#define ATTRIBUTE(dir, name, member) \
 do { \
 	void *rc; \
-	rc = hypfs_create_u64(sb, dir, name, member); \
+	rc = hypfs_create_u64(dir, name, member); \
 	if (IS_ERR(rc)) \
 		return PTR_ERR(rc); \
 } while(0)
 
-static int hpyfs_vm_create_guest(struct super_block *sb,
-				 struct dentry *systems_dir,
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
 				 struct diag2fc_data *data)
 {
 	char guest_name[NAME_LEN + 1] = {};
@@ -130,46 +129,46 @@ static int hpyfs_vm_create_guest(struct super_block *sb,
 	memcpy(guest_name, data->guest_name, NAME_LEN);
 	EBCASC(guest_name, NAME_LEN);
 	strim(guest_name);
-	guest_dir = hypfs_mkdir(sb, systems_dir, guest_name);
+	guest_dir = hypfs_mkdir(systems_dir, guest_name);
 	if (IS_ERR(guest_dir))
 		return PTR_ERR(guest_dir);
-	ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time);
+	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
 
 	/* logical cpu information */
-	cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus");
+	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
 	if (IS_ERR(cpus_dir))
 		return PTR_ERR(cpus_dir);
-	ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu);
-	ATTRIBUTE(sb, cpus_dir, "capped", capped_value);
-	ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag);
-	ATTRIBUTE(sb, cpus_dir, "count", data->vcpus);
-	ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min);
-	ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max);
-	ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares);
+	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+	ATTRIBUTE(cpus_dir, "capped", capped_value);
+	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+	ATTRIBUTE(cpus_dir, "count", data->vcpus);
+	ATTRIBUTE(cpus_dir, "weight_min", data->cpu_min);
+	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
 
 	/* memory information */
-	mem_dir = hypfs_mkdir(sb, guest_dir, "mem");
+	mem_dir = hypfs_mkdir(guest_dir, "mem");
 	if (IS_ERR(mem_dir))
 		return PTR_ERR(mem_dir);
-	ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb);
-	ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb);
-	ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb);
-	ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb);
+	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
 
 	/* samples */
-	samples_dir = hypfs_mkdir(sb, guest_dir, "samples");
+	samples_dir = hypfs_mkdir(guest_dir, "samples");
 	if (IS_ERR(samples_dir))
 		return PTR_ERR(samples_dir);
-	ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp);
-	ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp);
-	ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp);
-	ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp);
-	ATTRIBUTE(sb, samples_dir, "other", data->other_samp);
-	ATTRIBUTE(sb, samples_dir, "total", data->total_samp);
+	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+	ATTRIBUTE(samples_dir, "other", data->other_samp);
+	ATTRIBUTE(samples_dir, "total", data->total_samp);
 	return 0;
 }
 
-int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_vm_create_files(struct dentry *root)
 {
 	struct dentry *dir, *file;
 	struct diag2fc_data *data;
@@ -181,38 +180,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
 		return PTR_ERR(data);
 
 	/* Hpervisor Info */
-	dir = hypfs_mkdir(sb, root, "hyp");
+	dir = hypfs_mkdir(root, "hyp");
 	if (IS_ERR(dir)) {
 		rc = PTR_ERR(dir);
 		goto failed;
 	}
-	file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor");
+	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
 	if (IS_ERR(file)) {
 		rc = PTR_ERR(file);
 		goto failed;
 	}
 
 	/* physical cpus */
-	dir = hypfs_mkdir(sb, root, "cpus");
+	dir = hypfs_mkdir(root, "cpus");
 	if (IS_ERR(dir)) {
 		rc = PTR_ERR(dir);
 		goto failed;
 	}
-	file = hypfs_create_u64(sb, dir, "count", data->lcpus);
+	file = hypfs_create_u64(dir, "count", data->lcpus);
 	if (IS_ERR(file)) {
 		rc = PTR_ERR(file);
 		goto failed;
 	}
 
 	/* guests */
-	dir = hypfs_mkdir(sb, root, "systems");
+	dir = hypfs_mkdir(root, "systems");
 	if (IS_ERR(dir)) {
 		rc = PTR_ERR(dir);
 		goto failed;
 	}
 
 	for (i = 0; i < count; i++) {
-		rc = hpyfs_vm_create_guest(sb, dir, &(data[i]));
+		rc = hpyfs_vm_create_guest(dir, &(data[i]));
 		if (rc)
 			goto failed;
 	}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 7a539f4f5e30..ddfe09b45134 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -28,8 +28,7 @@
 #define HYPFS_MAGIC 0x687970	/* ASCII 'hyp' */
 #define TMP_SIZE 64		/* size of temporary buffers */
 
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
-					       struct dentry *dir);
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
 
 struct hypfs_sb_info {
 	kuid_t uid;			/* uid used for files and dirs */
@@ -193,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	}
 	hypfs_delete_tree(sb->s_root);
 	if (MACHINE_IS_VM)
-		rc = hypfs_vm_create_files(sb, sb->s_root);
+		rc = hypfs_vm_create_files(sb->s_root);
 	else
-		rc = hypfs_diag_create_files(sb, sb->s_root);
+		rc = hypfs_diag_create_files(sb->s_root);
 	if (rc) {
 		pr_err("Updating the hypfs tree failed\n");
 		hypfs_delete_tree(sb->s_root);
@@ -302,12 +301,12 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!root_dentry)
 		return -ENOMEM;
 	if (MACHINE_IS_VM)
-		rc = hypfs_vm_create_files(sb, root_dentry);
+		rc = hypfs_vm_create_files(root_dentry);
 	else
-		rc = hypfs_diag_create_files(sb, root_dentry);
+		rc = hypfs_diag_create_files(root_dentry);
 	if (rc)
 		return rc;
-	sbi->update_file = hypfs_create_update_file(sb, root_dentry);
+	sbi->update_file = hypfs_create_update_file(root_dentry);
 	if (IS_ERR(sbi->update_file))
 		return PTR_ERR(sbi->update_file);
 	hypfs_update_update(sb);
@@ -334,8 +333,7 @@ static void hypfs_kill_super(struct super_block *sb)
 	kill_litter_super(sb);
 }
 
-static struct dentry *hypfs_create_file(struct super_block *sb,
-					struct dentry *parent, const char *name,
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
 					char *data, umode_t mode)
 {
 	struct dentry *dentry;
@@ -347,7 +345,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
 		dentry = ERR_PTR(-ENOMEM);
 		goto fail;
 	}
-	inode = hypfs_make_inode(sb, mode);
+	inode = hypfs_make_inode(parent->d_sb, mode);
 	if (!inode) {
 		dput(dentry);
 		dentry = ERR_PTR(-ENOMEM);
@@ -373,24 +371,22 @@ fail:
 	return dentry;
 }
 
-struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
-			   const char *name)
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
 {
 	struct dentry *dentry;
 
-	dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE);
+	dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
 	if (IS_ERR(dentry))
 		return dentry;
 	hypfs_add_dentry(dentry);
 	return dentry;
 }
 
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
-					       struct dentry *dir)
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
 {
 	struct dentry *dentry;
 
-	dentry = hypfs_create_file(sb, dir, "update", NULL,
+	dentry = hypfs_create_file(dir, "update", NULL,
 				   S_IFREG | UPDATE_FILE_MODE);
 	/*
 	 * We do not put the update file on the 'delete' list with
@@ -400,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb,
 	return dentry;
 }
 
-struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_u64(struct dentry *dir,
 				const char *name, __u64 value)
 {
 	char *buffer;
@@ -412,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
 	if (!buffer)
 		return ERR_PTR(-ENOMEM);
 	dentry =
-	    hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
 	if (IS_ERR(dentry)) {
 		kfree(buffer);
 		return ERR_PTR(-ENOMEM);
@@ -421,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
 	return dentry;
 }
 
-struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_str(struct dentry *dir,
 				const char *name, char *string)
 {
 	char *buffer;
@@ -432,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
 		return ERR_PTR(-ENOMEM);
 	sprintf(buffer, "%s\n", string);
 	dentry =
-	    hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
 	if (IS_ERR(dentry)) {
 		kfree(buffer);
 		return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 4066cee0c2d2..4bbb5957ed1b 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,6 +9,8 @@
 #ifndef _ASM_S390_AIRQ_H
 #define _ASM_S390_AIRQ_H
 
+#include <linux/bit_spinlock.h>
+
 struct airq_struct {
 	struct hlist_node list;		/* Handler queueing. */
 	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
@@ -23,4 +25,69 @@ struct airq_struct {
 int register_adapter_interrupt(struct airq_struct *airq);
 void unregister_adapter_interrupt(struct airq_struct *airq);
 
+/* Adapter interrupt bit vector */
+struct airq_iv {
+	unsigned long *vector;	/* Adapter interrupt bit vector */
+	unsigned long *avail;	/* Allocation bit mask for the bit vector */
+	unsigned long *bitlock;	/* Lock bit mask for the bit vector */
+	unsigned long *ptr;	/* Pointer associated with each bit */
+	unsigned int *data;	/* 32 bit value associated with each bit */
+	unsigned long bits;	/* Number of bits in the vector */
+	unsigned long end;	/* Number of highest allocated bit + 1 */
+	spinlock_t lock;	/* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC	1	/* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK	2	/* Allocate the lock bit mask */
+#define AIRQ_IV_PTR	4	/* Allocate the ptr array */
+#define AIRQ_IV_DATA	8	/* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc_bit(struct airq_iv *iv);
+void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+			   unsigned long end);
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+	return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+				    unsigned int data)
+{
+	iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+				   unsigned long ptr)
+{
+	iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->ptr[bit];
+}
+
 #endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 7d4676758733..10135a38673c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 		"	oc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
 }
 
 static inline void 
@@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 		"	nc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc");
 }
 
 static inline void 
@@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
 	addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
 	asm volatile(
 		"	xc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+		: "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
 }
 
 static inline void 
@@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	asm volatile(
 		"	oc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
@@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	asm volatile(
 		"	nc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_ni_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_ni_bitmap[nr & 7])
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
@@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
 	ch = *(unsigned char *) addr;
 	asm volatile(
 		"	xc	%O0(1,%R0),%1"
-		: "=Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
+		: "+Q" (*(char *) addr)	: "Q" (_oi_bitmap[nr & 7])
 		: "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index ffb898961c8d..d42625053c37 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
 	return 0;
 }
 
+void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 
 extern void reipl_ccw_dev(struct ccw_dev_id *id);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
 #include <asm/div64.h>
 
 
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
 typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 0c82ba86e997..a908d2941c5d 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -20,4 +20,9 @@
 
 #define HARDIRQ_BITS	8
 
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index bd90359d6d22..11eae5f55b70 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -17,6 +17,9 @@
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep);
 
 /*
  * If the arch doesn't supply something else, assume that hugepage
@@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
 int arch_prepare_hugepage(struct page *page);
 void arch_release_hugepage(struct page *page);
 
-static inline pte_t huge_pte_wrprotect(pte_t pte)
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep)
 {
-	pte_val(pte) |= _PAGE_RO;
-	return pte;
+	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
 }
 
-static inline int huge_pte_none(pte_t pte)
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
 {
-	return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
-		!(pte_val(pte) & _SEGMENT_ENTRY_RO);
+	huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
 }
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
 {
-	pte_t pte = *ptep;
-	unsigned long mask;
-
-	if (!MACHINE_HAS_HPAGE) {
-		ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
-		if (ptep) {
-			mask = pte_val(pte) &
-				(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
-			pte = pte_mkhuge(*ptep);
-			pte_val(pte) |= mask;
-		}
+	int changed = !pte_same(huge_ptep_get(ptep), pte);
+	if (changed) {
+		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
 	}
-	return pte;
+	return changed;
 }
 
-static inline void __pmd_csp(pmd_t *pmdp)
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
 {
-	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-					       _SEGMENT_ENTRY_INV;
-	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
-	asm volatile(
-		"	csp %1,%3"
-		: "=m" (*pmdp)
-		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 
-static inline void huge_ptep_invalidate(struct mm_struct *mm,
-					unsigned long address, pte_t *ptep)
-{
-	pmd_t *pmdp = (pmd_t *) ptep;
-
-	if (MACHINE_HAS_IDTE)
-		__pmd_idte(address, pmdp);
-	else
-		__pmd_csp(pmdp);
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = huge_ptep_get(ptep);
-
-	huge_ptep_invalidate(mm, addr, ptep);
-	return pte;
-}
-
-#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
-({									    \
-	int __changed = !pte_same(huge_ptep_get(__ptep), __entry);	    \
-	if (__changed) {						    \
-		huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep);	    \
-		set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry);   \
-	}								    \
-	__changed;							    \
-})
-
-#define huge_ptep_set_wrprotect(__mm, __addr, __ptep)			\
-({									\
-	pte_t __pte = huge_ptep_get(__ptep);				\
-	if (huge_pte_write(__pte)) {					\
-		huge_ptep_invalidate(__mm, __addr, __ptep);		\
-		set_huge_pte_at(__mm, __addr, __ptep,			\
-				huge_pte_wrprotect(__pte));		\
-	}								\
-})
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long address, pte_t *ptep)
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
 {
-	huge_ptep_invalidate(vma->vm_mm, address, ptep);
+	return mk_pte(page, pgprot);
 }
 
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+static inline int huge_pte_none(pte_t pte)
 {
-	pte_t pte;
-	pmd_t pmd;
-
-	pmd = mk_pmd_phys(page_to_phys(page), pgprot);
-	pte_val(pte) = pmd_val(pmd);
-	return pte;
+	return pte_none(pte);
 }
 
 static inline int huge_pte_write(pte_t pte)
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	return pmd_write(pmd);
+	return pte_write(pte);
 }
 
 static inline int huge_pte_dirty(pte_t pte)
 {
-	/* No dirty bit in the segment table entry. */
-	return 0;
+	return pte_dirty(pte);
 }
 
 static inline pte_t huge_pte_mkwrite(pte_t pte)
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
-	return pte;
+	return pte_mkwrite(pte);
 }
 
 static inline pte_t huge_pte_mkdirty(pte_t pte)
 {
-	/* No dirty bit in the segment table entry. */
-	return pte;
+	return pte_mkdirty(pte);
 }
 
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t huge_pte_wrprotect(pte_t pte)
 {
-	pmd_t pmd;
-
-	pmd_val(pmd) = pte_val(pte);
-	pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
-	return pte;
+	return pte_wrprotect(pte);
 }
 
-static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
-				  pte_t *ptep)
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pmd_clear((pmd_t *) ptep);
+	return pte_modify(pte, newprot);
 }
 
 #endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index 7e3d2586c1ff..ee96a8b697f9 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -4,19 +4,8 @@
 #include <linux/msi.h>
 #include <linux/pci.h>
 
-static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
-{
-	return __irq_get_msi_desc(irq);
-}
-
-/* Must be called with msi map lock held */
-static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
-{
-	if (!msi)
-		return -EINVAL;
-
-	msi->irq = irq;
-	return 0;
-}
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
 
 #endif
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 87c17bfb2968..1eaa3625803c 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,17 +1,28 @@
 #ifndef _ASM_IRQ_H
 #define _ASM_IRQ_H
 
+#define EXT_INTERRUPT	1
+#define IO_INTERRUPT	2
+#define THIN_INTERRUPT	3
+
+#define NR_IRQS_BASE	4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS	NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ		0
+
+#ifndef __ASSEMBLY__
+
 #include <linux/hardirq.h>
 #include <linux/percpu.h>
 #include <linux/cache.h>
 #include <linux/types.h>
 
-enum interruption_main_class {
-	EXTERNAL_INTERRUPT,
-	IO_INTERRUPT,
-	NR_IRQS
-};
-
 enum interruption_class {
 	IRQEXT_CLK,
 	IRQEXT_EXC,
@@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void);
 void measurement_alert_subclass_register(void);
 void measurement_alert_subclass_unregister(void);
 
-#ifdef CONFIG_LOCKDEP
-#  define disable_irq_nosync_lockdep(irq)	disable_irq_nosync(irq)
-#  define disable_irq_nosync_lockdep_irqsave(irq, flags) \
-						disable_irq_nosync(irq)
-#  define disable_irq_lockdep(irq)		disable_irq(irq)
-#  define enable_irq_lockdep(irq)		enable_irq(irq)
-#  define enable_irq_lockdep_irqrestore(irq, flags) \
-						enable_irq(irq)
-#endif
+#define irq_canonicalize(irq)  (irq)
+
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3238d4004e84..e87ecaa2c569 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -274,6 +274,14 @@ struct kvm_arch{
 	int css_support;
 };
 
+#define KVM_HVA_ERR_BAD		(-1UL)
+#define KVM_HVA_ERR_RO_BAD	(-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+	return IS_ERR_VALUE(addr);
+}
+
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 #endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6340178748bf..ff132ac64ddd 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,8 +12,6 @@ typedef struct {
 	unsigned long asce_bits;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
-	/* Cloned contexts will be created with extended page tables. */
-	unsigned int alloc_pgste:1;
 	/* The mmu context has extended page tables. */
 	unsigned int has_pgste:1;
 } mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 084e7755ed9b..9f973d8de90e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk,
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	if (current->mm && current->mm->context.alloc_pgste) {
-		/*
-		 * alloc_pgste indicates, that any NEW context will be created
-		 * with extended page tables. The old context is unchanged. The
-		 * page table allocation and the page table operations will
-		 * look at has_pgste to distinguish normal and extended page
-		 * tables. The only way to create extended page tables is to
-		 * set alloc_pgste and then create a new context (e.g. dup_mm).
-		 * The page table allocation is called after init_new_context
-		 * and if has_pgste is set, it will create extended page
-		 * tables.
-		 */
-		mm->context.has_pgste = 1;
-		mm->context.alloc_pgste = 1;
-	} else {
-		mm->context.has_pgste = 0;
-		mm->context.alloc_pgste = 0;
-	}
+	mm->context.has_pgste = 0;
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
@@ -77,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	atomic_inc(&next->context.attach_count);
 	/* Check for TLBs not flushed yet */
-	if (next->context.flush_mm)
-		__tlb_flush_mm(next);
+	__tlb_flush_mm_lazy(next);
 }
 
 #define enter_lazy_tlb(mm,tsk)	do { } while (0)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 5d64fb7619cc..1e51f2915b2e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -32,16 +32,6 @@
 
 void storage_key_init_range(unsigned long start, unsigned long end);
 
-static inline unsigned long pfmf(unsigned long function, unsigned long address)
-{
-	asm volatile(
-		"	.insn	rre,0xb9af0000,%[function],%[address]"
-		: [address] "+a" (address)
-		: [function] "d" (function)
-		: "memory");
-	return address;
-}
-
 static inline void clear_page(void *page)
 {
 	register unsigned long reg1 asm ("1") = 0;
@@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
 #define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
 #define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
 
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(unsigned long pfn)
-{
-	return page_reset_referenced(pfn << PAGE_SHIFT);
-}
-
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6e577ba0e5da..c290f13d1c47 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,6 +6,7 @@
 /* must be set before including pci_clp.h */
 #define PCI_BAR_COUNT	6
 
+#include <linux/pci.h>
 #include <asm-generic/pci.h>
 #include <asm-generic/pci-dma-compat.h>
 #include <asm/pci_clp.h>
@@ -53,14 +54,9 @@ struct zpci_fmb {
 	atomic64_t unmapped_pages;
 } __packed __aligned(16);
 
-struct msi_map {
-	unsigned long irq;
-	struct msi_desc *msi;
-	struct hlist_node msi_chain;
-};
-
-#define ZPCI_NR_MSI_VECS	64
-#define ZPCI_MSI_MASK		(ZPCI_NR_MSI_VECS - 1)
+#define ZPCI_MSI_VEC_BITS	11
+#define ZPCI_MSI_VEC_MAX	(1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK	(ZPCI_MSI_VEC_MAX - 1)
 
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
@@ -91,8 +87,7 @@ struct zpci_dev {
 
 	/* IRQ stuff */
 	u64		msi_addr;	/* MSI address */
-	struct zdev_irq_map *irq_map;
-	struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+	struct airq_iv *aibv;		/* adapter interrupt bit vector */
 	unsigned int	aisb;		/* number of the summary bit */
 
 	/* DMA stuff */
@@ -122,11 +117,6 @@ struct zpci_dev {
 	struct dentry	*debugfs_perf;
 };
 
-struct pci_hp_callback_ops {
-	int (*create_slot)	(struct zpci_dev *zdev);
-	void (*remove_slot)	(struct zpci_dev *zdev);
-};
-
 static inline bool zdev_enabled(struct zpci_dev *zdev)
 {
 	return (zdev->fh & (1UL << 31)) ? true : false;
@@ -146,32 +136,38 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 
 /* CLP */
-int clp_find_pci_devices(void);
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
 int clp_add_pci_device(u32, u32, int);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 
-/* MSI */
-struct msi_desc *__irq_get_msi_desc(unsigned int);
-int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
-int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
-void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
-int zpci_msihash_init(void);
-void zpci_msihash_exit(void);
-
 #ifdef CONFIG_PCI
 /* Error handling and recovery */
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
+void zpci_rescan(void);
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
 #endif /* CONFIG_PCI */
 
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+	return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
 /* Helpers */
 struct zpci_dev *get_zdev(struct pci_dev *);
 struct zpci_dev *get_zdev_by_fid(u32);
-bool zpci_fid_present(u32);
 
 /* sysfs */
 int zpci_sysfs_add_device(struct device *);
@@ -181,14 +177,6 @@ void zpci_sysfs_remove_device(struct device *);
 int zpci_dma_init(void);
 void zpci_dma_exit(void);
 
-/* Hotplug */
-extern struct mutex zpci_list_lock;
-extern struct list_head zpci_list;
-extern unsigned int s390_pci_probe;
-
-void zpci_register_hp_ops(struct pci_hp_callback_ops *);
-void zpci_deregister_hp_ops(void);
-
 /* FMB */
 int zpci_fmb_enable_device(struct zpci_dev *);
 int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e6a2bdd4d705..df6eac9f0cb4 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -79,11 +79,11 @@ struct zpci_fib {
 } __packed;
 
 
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib);
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range);
-int s390pci_load(u64 *data, u64 req, u64 offset);
-int s390pci_store(u64 data, u64 req, u64 offset);
-int s390pci_store_block(const u64 *data, u64 req, u64 offset);
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
 
 #endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 83a9caa6ae53..d194d544d694 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
 	u64 data;								\
 	int rc;									\
 										\
-	rc = s390pci_load(&data, req, ZPCI_OFFSET(addr));			\
+	rc = zpci_load(&data, req, ZPCI_OFFSET(addr));				\
 	if (rc)									\
 		data = -1ULL;							\
 	return (RETTYPE) data;							\
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val,				\
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
 	u64 data = (VALTYPE) val;						\
 										\
-	s390pci_store(data, req, ZPCI_OFFSET(addr));				\
+	zpci_store(data, req, ZPCI_OFFSET(addr));				\
 }
 
 zpci_read(8, u64)
@@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
 		val = 0;		/* let FW report error */
 		break;
 	}
-	return s390pci_store(val, req, offset);
+	return zpci_store(val, req, offset);
 }
 
 static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
@@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
 	u64 data;
 	int cc;
 
-	cc = s390pci_load(&data, req, offset);
+	cc = zpci_load(&data, req, offset);
 	if (cc)
 		goto out;
 
@@ -115,7 +115,7 @@ out:
 
 static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
 {
-	return s390pci_store_block(data, req, offset);
+	return zpci_store_block(data, req, offset);
 }
 
 static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 75fb726de91f..9b60a36c348d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -217,63 +217,57 @@ extern unsigned long MODULES_END;
 
 /* Hardware bits in the page table entry */
 #define _PAGE_CO	0x100		/* HW Change-bit override */
-#define _PAGE_RO	0x200		/* HW read-only bit  */
+#define _PAGE_PROTECT	0x200		/* HW read-only bit  */
 #define _PAGE_INVALID	0x400		/* HW invalid bit    */
+#define _PAGE_LARGE	0x800		/* Bit to mark a large pte */
 
 /* Software bits in the page table entry */
-#define _PAGE_SWT	0x001		/* SW pte type bit t */
-#define _PAGE_SWX	0x002		/* SW pte type bit x */
-#define _PAGE_SWC	0x004		/* SW pte changed bit */
-#define _PAGE_SWR	0x008		/* SW pte referenced bit */
-#define _PAGE_SWW	0x010		/* SW pte write bit */
-#define _PAGE_SPECIAL	0x020		/* SW associated with special page */
+#define _PAGE_PRESENT	0x001		/* SW pte present bit */
+#define _PAGE_TYPE	0x002		/* SW pte type bit */
+#define _PAGE_YOUNG	0x004		/* SW pte young bit */
+#define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
+#define _PAGE_READ	0x010		/* SW pte read bit */
+#define _PAGE_WRITE	0x020		/* SW pte write bit */
+#define _PAGE_SPECIAL	0x040		/* SW associated with special page */
 #define __HAVE_ARCH_PTE_SPECIAL
 
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
-				 _PAGE_SWC | _PAGE_SWR)
-
-/* Six different types of pages. */
-#define _PAGE_TYPE_EMPTY	0x400
-#define _PAGE_TYPE_NONE		0x401
-#define _PAGE_TYPE_SWAP		0x403
-#define _PAGE_TYPE_FILE		0x601	/* bit 0x002 is used for offset !! */
-#define _PAGE_TYPE_RO		0x200
-#define _PAGE_TYPE_RW		0x000
-
-/*
- * Only four types for huge pages, using the invalid bit and protection bit
- * of a segment table entry.
- */
-#define _HPAGE_TYPE_EMPTY	0x020	/* _SEGMENT_ENTRY_INV */
-#define _HPAGE_TYPE_NONE	0x220
-#define _HPAGE_TYPE_RO		0x200	/* _SEGMENT_ENTRY_RO  */
-#define _HPAGE_TYPE_RW		0x000
+				 _PAGE_DIRTY | _PAGE_YOUNG)
 
 /*
- * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
- * pte_none and pte_file to find out the pte type WITHOUT holding the page
- * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
- * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
- * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
- * This change is done while holding the lock, but the intermediate step
- * of a previously valid pte with the hw invalid bit set can be observed by
- * handle_pte_fault. That makes it necessary that all valid pte types with
- * the hw invalid bit set must be distinguishable from the four pte types
- * empty, none, swap and file.
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
  *
- *			irxt  ipte  irxt
- * _PAGE_TYPE_EMPTY	1000   ->   1000
- * _PAGE_TYPE_NONE	1001   ->   1001
- * _PAGE_TYPE_SWAP	1011   ->   1011
- * _PAGE_TYPE_FILE	11?1   ->   11?1
- * _PAGE_TYPE_RO	0100   ->   1100
- * _PAGE_TYPE_RW	0000   ->   1000
+ *				842100000000
+ *				000084210000
+ *				000000008421
+ *				.IR...wrdytp
+ * empty			.10...000000
+ * swap				.10...xxxx10
+ * file				.11...xxxxx0
+ * prot-none, clean, old	.11...000001
+ * prot-none, clean, young	.11...000101
+ * prot-none, dirty, old	.10...001001
+ * prot-none, dirty, young	.10...001101
+ * read-only, clean, old	.11...010001
+ * read-only, clean, young	.01...010101
+ * read-only, dirty, old	.11...011001
+ * read-only, dirty, young	.01...011101
+ * read-write, clean, old	.11...110001
+ * read-write, clean, young	.01...110101
+ * read-write, dirty, old	.10...111001
+ * read-write, dirty, young	.00...111101
  *
- * pte_none is true for bits combinations 1000, 1010, 1100, 1110
- * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
- * pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none    is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file    is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap    is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
  */
 
 #ifndef CONFIG_64BIT
@@ -286,14 +280,25 @@ extern unsigned long MODULES_END;
 #define _ASCE_TABLE_LENGTH	0x7f	/* 128 x 64 entries = 8k	    */
 
 /* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS	0x7fffffffUL	/* Valid segment table bits */
 #define _SEGMENT_ENTRY_ORIGIN	0x7fffffc0UL	/* page table origin	    */
-#define _SEGMENT_ENTRY_RO	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_INV	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 #define _SEGMENT_ENTRY_COMMON	0x10	/* common segment bit		    */
 #define _SEGMENT_ENTRY_PTL	0x0f	/* page table length		    */
+#define _SEGMENT_ENTRY_NONE	_SEGMENT_ENTRY_PROTECT
 
 #define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ *		..R...I.....
+ * prot-none	..1...1.....
+ * read-only	..1...0.....
+ * read-write	..0...0.....
+ * empty	..0...1.....
+ */
 
 /* Page status table bits for virtualization */
 #define PGSTE_ACC_BITS	0xf0000000UL
@@ -303,9 +308,7 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GC_BIT	0x00020000UL
-#define PGSTE_UR_BIT	0x00008000UL
-#define PGSTE_UC_BIT	0x00004000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x00002000UL	/* IPTE notify bit */
+#define PGSTE_IN_BIT	0x00008000UL	/* IPTE notify bit */
 
 #else /* CONFIG_64BIT */
 
@@ -324,8 +327,8 @@ extern unsigned long MODULES_END;
 
 /* Bits in the region table entry */
 #define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
-#define _REGION_ENTRY_RO	0x200	/* region protection bit	    */
-#define _REGION_ENTRY_INV	0x20	/* invalid region table entry	    */
+#define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
+#define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
 #define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
 #define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
@@ -333,29 +336,47 @@ extern unsigned long MODULES_END;
 #define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
 
 #define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
-#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV)
+#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
 #define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
-#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV)
+#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
-#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV)
+#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
 #define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
 #define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
 #define _REGION3_ENTRY_CO	0x100	/* change-recording override	    */
 
 /* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
-#define _SEGMENT_ENTRY_RO	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_INV	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 
 #define _SEGMENT_ENTRY		(0)
-#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
 
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
+#define _SEGMENT_ENTRY_SPLIT	0x001	/* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG	0x002	/* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE	_SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ *			..R...I...y.
+ * prot-none, old	..0...1...1.
+ * prot-none, young	..1...1...1.
+ * read-only, old	..1...1...0.
+ * read-only, young	..1...0...1.
+ * read-write, old	..0...1...0.
+ * read-write, young	..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
 #define _SEGMENT_ENTRY_SPLIT_BIT 0	/* THP splitting bit number */
-#define _SEGMENT_ENTRY_SPLIT	(1UL << _SEGMENT_ENTRY_SPLIT_BIT)
 
 /* Set of bits not changed in pmd_modify */
 #define _SEGMENT_CHG_MASK	(_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
@@ -369,9 +390,7 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
-#define PGSTE_UR_BIT	0x0000800000000000UL
-#define PGSTE_UC_BIT	0x0000400000000000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x0000200000000000UL	/* IPTE notify bit */
+#define PGSTE_IN_BIT	0x0000800000000000UL	/* IPTE notify bit */
 
 #endif /* CONFIG_64BIT */
 
@@ -386,14 +405,18 @@ extern unsigned long MODULES_END;
 /*
  * Page protection definitions.
  */
-#define PAGE_NONE	__pgprot(_PAGE_TYPE_NONE)
-#define PAGE_RO		__pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW		__pgprot(_PAGE_TYPE_RO | _PAGE_SWW)
-#define PAGE_RWC	__pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC)
-
-#define PAGE_KERNEL	PAGE_RWC
-#define PAGE_SHARED	PAGE_KERNEL
-#define PAGE_COPY	PAGE_RO
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+				 _PAGE_PROTECT)
 
 /*
  * On s390 the page table entry has an invalid bit and a read-only bit.
@@ -402,35 +425,31 @@ extern unsigned long MODULES_END;
  */
          /*xwr*/
 #define __P000	PAGE_NONE
-#define __P001	PAGE_RO
-#define __P010	PAGE_RO
-#define __P011	PAGE_RO
-#define __P100	PAGE_RO
-#define __P101	PAGE_RO
-#define __P110	PAGE_RO
-#define __P111	PAGE_RO
+#define __P001	PAGE_READ
+#define __P010	PAGE_READ
+#define __P011	PAGE_READ
+#define __P100	PAGE_READ
+#define __P101	PAGE_READ
+#define __P110	PAGE_READ
+#define __P111	PAGE_READ
 
 #define __S000	PAGE_NONE
-#define __S001	PAGE_RO
-#define __S010	PAGE_RW
-#define __S011	PAGE_RW
-#define __S100	PAGE_RO
-#define __S101	PAGE_RO
-#define __S110	PAGE_RW
-#define __S111	PAGE_RW
+#define __S001	PAGE_READ
+#define __S010	PAGE_WRITE
+#define __S011	PAGE_WRITE
+#define __S100	PAGE_READ
+#define __S101	PAGE_READ
+#define __S110	PAGE_WRITE
+#define __S111	PAGE_WRITE
 
 /*
  * Segment entry (large page) protection definitions.
  */
-#define SEGMENT_NONE	__pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO	__pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW	__pgprot(_HPAGE_TYPE_RW)
-
-static inline int mm_exclusive(struct mm_struct *mm)
-{
-	return likely(mm == current->active_mm &&
-		      atomic_read(&mm->context.attach_count) <= 1);
-}
+#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_INVALID)
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
@@ -467,7 +486,7 @@ static inline int pgd_none(pgd_t pgd)
 {
 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
 		return 0;
-	return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
+	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
 }
 
 static inline int pgd_bad(pgd_t pgd)
@@ -478,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd)
 	 * invalid for either table entry.
 	 */
 	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 	return (pgd_val(pgd) & mask) != 0;
 }
@@ -494,7 +513,7 @@ static inline int pud_none(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
+	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
 }
 
 static inline int pud_large(pud_t pud)
@@ -512,7 +531,7 @@ static inline int pud_bad(pud_t pud)
 	 * invalid for either table entry.
 	 */
 	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
 		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 	return (pud_val(pud) & mask) != 0;
 }
@@ -521,30 +540,36 @@ static inline int pud_bad(pud_t pud)
 
 static inline int pmd_present(pmd_t pmd)
 {
-	unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
-	return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
-	       !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
+	return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
 }
 
 static inline int pmd_none(pmd_t pmd)
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
-	       !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
+	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 }
 
 static inline int pmd_large(pmd_t pmd)
 {
 #ifdef CONFIG_64BIT
-	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 #else
 	return 0;
 #endif
 }
 
+static inline int pmd_prot_none(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+		(pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
 static inline int pmd_bad(pmd_t pmd)
 {
-	unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
-	return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
+#ifdef CONFIG_64BIT
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 
 #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -563,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+	if (pmd_prot_none(pmd))
+		return 0;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
 }
 
 static inline int pmd_young(pmd_t pmd)
 {
-	return 0;
+	int young = 0;
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd))
+		young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+	else
+		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+	return young;
 }
 
-static inline int pte_none(pte_t pte)
+static inline int pte_present(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
+	/* Bit pattern: (pte & 0x001) == 0x001 */
+	return (pte_val(pte) & _PAGE_PRESENT) != 0;
 }
 
-static inline int pte_present(pte_t pte)
+static inline int pte_none(pte_t pte)
 {
-	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
-	return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
-		(!(pte_val(pte) & _PAGE_INVALID) &&
-		 !(pte_val(pte) & _PAGE_SWT));
+	/* Bit pattern: pte == 0x400 */
+	return pte_val(pte) == _PAGE_INVALID;
 }
 
 static inline int pte_file(pte_t pte)
 {
-	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
-	return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
+	/* Bit pattern: (pte & 0x601) == 0x600 */
+	return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+		== (_PAGE_INVALID | _PAGE_PROTECT);
 }
 
 static inline int pte_special(pte_t pte)
@@ -634,6 +668,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 #endif
 }
 
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+	unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+	return __pgste(pgste);
+}
+
 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
@@ -644,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long address, bits;
-	unsigned char skey;
+	unsigned long address, bits, skey;
 
 	if (pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 	address = pte_val(*ptep) & PAGE_MASK;
-	skey = page_get_storage_key(address);
+	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-	/* Clear page changed & referenced bit in the storage key */
-	if (bits & _PAGE_CHANGED)
+	if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
+		/* Transfer dirty + referenced bit to host bits in pgste */
+		pgste_val(pgste) |= bits << 52;
 		page_set_storage_key(address, skey ^ bits, 0);
-	else if (bits)
+	} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
+		   (bits & _PAGE_REFERENCED)) {
+		/* Transfer referenced bit to host bit in pgste */
+		pgste_val(pgste) |= PGSTE_HR_BIT;
 		page_reset_referenced(address);
+	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
-	/* Get host changed & referenced bits from pgste */
-	bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
-	/* Transfer page changed & referenced bit to kvm user bits */
-	pgste_val(pgste) |= bits << 45;		/* PGSTE_UR_BIT & PGSTE_UC_BIT */
-	/* Clear relevant host bits in pgste. */
-	pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 	/* Copy page access key and fetch protection bit to pgste */
-	pgste_val(pgste) |=
-		(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-	/* Transfer referenced bit to pte */
-	pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 #endif
 	return pgste;
 
@@ -679,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	int young;
-
 	if (pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 	/* Get referenced bit from storage key */
-	young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
-	if (young)
-		pgste_val(pgste) |= PGSTE_GR_BIT;
-	/* Get host referenced bit from pgste */
-	if (pgste_val(pgste) & PGSTE_HR_BIT) {
-		pgste_val(pgste) &= ~PGSTE_HR_BIT;
-		young = 1;
-	}
-	/* Transfer referenced bit to kvm user bits and pte */
-	if (young) {
-		pgste_val(pgste) |= PGSTE_UR_BIT;
-		pte_val(*ptep) |= _PAGE_SWR;
-	}
+	if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
+		pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
 #endif
 	return pgste;
 }
@@ -723,13 +748,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
 
 static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
 {
-	if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) {
+	if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) {
 		/*
 		 * Without enhanced suppression-on-protection force
 		 * the dirty bit on for all writable ptes.
 		 */
-		pte_val(entry) |= _PAGE_SWC;
-		pte_val(entry) &= ~_PAGE_RO;
+		pte_val(entry) |= _PAGE_DIRTY;
+		pte_val(entry) &= ~_PAGE_PROTECT;
 	}
 	*ptep = entry;
 }
@@ -841,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  */
 static inline int pte_write(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_SWW) != 0;
+	return (pte_val(pte) & _PAGE_WRITE) != 0;
 }
 
 static inline int pte_dirty(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_SWC) != 0;
+	return (pte_val(pte) & _PAGE_DIRTY) != 0;
 }
 
 static inline int pte_young(pte_t pte)
 {
-#ifdef CONFIG_PGSTE
-	if (pte_val(pte) & _PAGE_SWR)
-		return 1;
-#endif
-	return 0;
+	return (pte_val(pte) & _PAGE_YOUNG) != 0;
 }
 
 /*
@@ -880,12 +901,12 @@ static inline void pud_clear(pud_t *pud)
 
 static inline void pmd_clear(pmd_t *pmdp)
 {
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 }
 
 /*
@@ -896,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte_val(pte) &= _PAGE_CHG_MASK;
 	pte_val(pte) |= pgprot_val(newprot);
-	if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW))
-		pte_val(pte) &= ~_PAGE_RO;
+	/*
+	 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+	 * invalid bit set, clear it again for readable, young pages
+	 */
+	if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+		pte_val(pte) &= ~_PAGE_INVALID;
+	/*
+	 * newprot for PAGE_READ and PAGE_WRITE has the page protection
+	 * bit set, clear it again for writable, dirty pages
+	 */
+	if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	pte_val(pte) &= ~_PAGE_SWW;
-	/* Do not clobber _PAGE_TYPE_NONE pages!  */
-	if (!(pte_val(pte) & _PAGE_INVALID))
-		pte_val(pte) |= _PAGE_RO;
+	pte_val(pte) &= ~_PAGE_WRITE;
+	pte_val(pte) |= _PAGE_PROTECT;
 	return pte;
 }
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-	pte_val(pte) |= _PAGE_SWW;
-	if (pte_val(pte) & _PAGE_SWC)
-		pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_WRITE;
+	if (pte_val(pte) & _PAGE_DIRTY)
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	pte_val(pte) &= ~_PAGE_SWC;
-	/* Do not clobber _PAGE_TYPE_NONE pages!  */
-	if (!(pte_val(pte) & _PAGE_INVALID))
-		pte_val(pte) |= _PAGE_RO;
+	pte_val(pte) &= ~_PAGE_DIRTY;
+	pte_val(pte) |= _PAGE_PROTECT;
 	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte_val(pte) |= _PAGE_SWC;
-	if (pte_val(pte) & _PAGE_SWW)
-		pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_DIRTY;
+	if (pte_val(pte) & _PAGE_WRITE)
+		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
 }
 
 static inline pte_t pte_mkold(pte_t pte)
 {
-#ifdef CONFIG_PGSTE
-	pte_val(pte) &= ~_PAGE_SWR;
-#endif
+	pte_val(pte) &= ~_PAGE_YOUNG;
+	pte_val(pte) |= _PAGE_INVALID;
 	return pte;
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
+	pte_val(pte) |= _PAGE_YOUNG;
+	if (pte_val(pte) & _PAGE_READ)
+		pte_val(pte) &= ~_PAGE_INVALID;
 	return pte;
 }
 
@@ -957,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
 #ifdef CONFIG_HUGETLB_PAGE
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-	pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
+	pte_val(pte) |= _PAGE_LARGE;
 	return pte;
 }
 #endif
@@ -974,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_all(ptep, pgste);
-		dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
-		pgste_val(pgste) &= ~PGSTE_UC_BIT;
+		dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
+		pgste_val(pgste) &= ~PGSTE_HC_BIT;
 		pgste_set_unlock(ptep, pgste);
 		return dirty;
 	}
@@ -994,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_young(ptep, pgste);
-		young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
-		pgste_val(pgste) &= ~PGSTE_UR_BIT;
+		young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
+		pgste_val(pgste) &= ~PGSTE_HR_BIT;
 		pgste_set_unlock(ptep, pgste);
 	}
 	return young;
 }
 
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+#ifndef CONFIG_64BIT
+		/* pto must point to the start of the segment table */
+		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
+#else
+		/* ipte in zarch mode can do the math */
+		pte_t *pto = ptep;
+#endif
+		asm volatile(
+			"	ipte	%2,%3"
+			: "=m" (*ptep) : "m" (*ptep),
+			  "a" (pto), "a" (address));
+	}
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+				   unsigned long address, pte_t *ptep)
+{
+	int active = (mm == current->active_mm) ? 1 : 0;
+
+	if (atomic_read(&mm->context.attach_count) > active)
+		__ptep_ipte(address, ptep);
+	else
+		mm->context.flush_mm = 1;
+}
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 {
 	pgste_t pgste;
 	pte_t pte;
+	int young;
 
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_young(ptep, pgste);
-		pte = *ptep;
-		*ptep = pte_mkold(pte);
-		pgste_set_unlock(ptep, pgste);
-		return pte_young(pte);
+		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
 	}
-	return 0;
+
+	pte = *ptep;
+	__ptep_ipte(addr, ptep);
+	young = pte_young(pte);
+	pte = pte_mkold(pte);
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste_set_pte(ptep, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = pte;
+
+	return young;
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 					 unsigned long address, pte_t *ptep)
 {
-	/* No need to flush TLB
-	 * On s390 reference bits are in storage key and never in TLB
-	 * With virtualization we handle the reference bit, without we
-	 * we can simply return */
 	return ptep_test_and_clear_young(vma, address, ptep);
 }
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
-	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-#ifndef CONFIG_64BIT
-		/* pto must point to the start of the segment table */
-		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
-		/* ipte in zarch mode can do the math */
-		pte_t *pto = ptep;
-#endif
-		asm volatile(
-			"	ipte	%2,%3"
-			: "=m" (*ptep) : "m" (*ptep),
-			  "a" (pto), "a" (address));
-	}
-}
-
 /*
  * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
  * both clear the TLB for the unmapped pte. The reason is that
@@ -1067,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	pgste_t pgste;
 	pte_t pte;
 
-	mm->context.flush_mm = 1;
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
-	if (!mm_exclusive(mm))
-		__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
@@ -1093,15 +1136,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 	pgste_t pgste;
 	pte_t pte;
 
-	mm->context.flush_mm = 1;
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
-	if (!mm_exclusive(mm))
-		__ptep_ipte(address, ptep);
+	ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) |= _PAGE_INVALID;
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
@@ -1117,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 	pgste_t pgste;
 
 	if (mm_has_pgste(mm)) {
-		pgste = *(pgste_t *)(ptep + PTRS_PER_PTE);
+		pgste = pgste_get(ptep);
 		pgste_set_key(ptep, pgste, pte);
 		pgste_set_pte(ptep, pte);
 		pgste_set_unlock(ptep, pgste);
@@ -1139,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 
 	pte = *ptep;
 	__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_update_all(&pte, pgste);
@@ -1163,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	pgste_t pgste;
 	pte_t pte;
 
-	if (mm_has_pgste(mm)) {
+	if (!full && mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		if (!full)
-			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
 	if (!full)
-		__ptep_ipte(address, ptep);
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+		ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
 
-	if (mm_has_pgste(mm)) {
+	if (!full && mm_has_pgste(mm)) {
 		pgste = pgste_update_all(&pte, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
@@ -1189,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 	pte_t pte = *ptep;
 
 	if (pte_write(pte)) {
-		mm->context.flush_mm = 1;
 		if (mm_has_pgste(mm)) {
 			pgste = pgste_get_lock(ptep);
 			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 		}
 
-		if (!mm_exclusive(mm))
-			__ptep_ipte(address, ptep);
+		ptep_flush_lazy(mm, address, ptep);
 		pte = pte_wrprotect(pte);
 
 		if (mm_has_pgste(mm)) {
@@ -1240,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
 	pte_t __pte;
 	pte_val(__pte) = physpage + pgprot_val(pgprot);
-	return __pte;
+	return pte_mkyoung(__pte);
 }
 
 static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1248,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 	unsigned long physpage = page_to_phys(page);
 	pte_t __pte = mk_pte_phys(physpage, pgprot);
 
-	if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) {
-		pte_val(__pte) |= _PAGE_SWC;
-		pte_val(__pte) &= ~_PAGE_RO;
-	}
+	if (pte_write(__pte) && PageDirty(page))
+		__pte = pte_mkdirty(__pte);
 	return __pte;
 }
 
@@ -1313,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
 	unsigned long sto = (unsigned long) pmdp -
 			    pmd_index(address) * sizeof(pmd_t);
 
-	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
 		asm volatile(
 			"	.insn	rrf,0xb98e0000,%2,%3,0,0"
 			: "=m" (*pmdp)
@@ -1324,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
 	}
 }
 
+static inline void __pmd_csp(pmd_t *pmdp)
+{
+	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+					       _SEGMENT_ENTRY_INVALID;
+	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+	asm volatile(
+		"	csp %1,%3"
+		: "=m" (*pmdp)
+		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
 static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 {
 	/*
-	 * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
 	 * Convert to segment table entry format.
 	 */
 	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
 		return pgprot_val(SEGMENT_NONE);
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
-		return pgprot_val(SEGMENT_RO);
-	return pgprot_val(SEGMENT_RW);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd)) {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	} else {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+	}
+#endif
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	if (pmd_prot_none(pmd)) {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	} else {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+	}
+#endif
+	return pmd;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
+	int young;
+
+	young = pmd_young(pmd);
 	pmd_val(pmd) &= _SEGMENT_CHG_MASK;
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	if (young)
+		pmd = pmd_mkyoung(pmd);
 	return pmd;
 }
 
@@ -1349,18 +1430,29 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
 {
 	pmd_t __pmd;
 	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
-	return __pmd;
+	return pmd_mkyoung(__pmd);
 }
 
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
-	/* Do not clobber _HPAGE_TYPE_NONE pages! */
-	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+	/* Do not clobber PROT_NONE segments! */
+	if (!pmd_prot_none(pmd))
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
 
+static inline void pmdp_flush_lazy(struct mm_struct *mm,
+				   unsigned long address, pmd_t *pmdp)
+{
+	int active = (mm == current->active_mm) ? 1 : 0;
+
+	if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
+		__pmd_idte(address, pmdp);
+	else
+		mm->context.flush_mm = 1;
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
@@ -1378,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
 {
-	if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+	if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
 		pmd_val(entry) |= _SEGMENT_ENTRY_CO;
 	*pmdp = entry;
 }
@@ -1391,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
-	pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+	/* Do not clobber PROT_NONE segments! */
+	if (!pmd_prot_none(pmd))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
@@ -1401,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
 	return pmd;
 }
 
-static inline pmd_t pmd_mkold(pmd_t pmd)
-{
-	/* No referenced bit in the segment table entry. */
-	return pmd;
-}
-
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
-{
-	/* No referenced bit in the segment table entry. */
-	return pmd;
-}
-
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long address, pmd_t *pmdp)
 {
-	unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
-	long tmp, rc;
-	int counter;
+	pmd_t pmd;
 
-	rc = 0;
-	if (MACHINE_HAS_RRBM) {
-		counter = PTRS_PER_PTE >> 6;
-		asm volatile(
-			"0:	.insn	rre,0xb9ae0000,%0,%3\n"	/* rrbm */
-			"	ogr	%1,%0\n"
-			"	la	%3,0(%4,%3)\n"
-			"	brct	%2,0b\n"
-			: "=&d" (tmp), "+&d" (rc), "+d" (counter),
-			  "+a" (pmd_addr)
-			: "a" (64 * 4096UL) : "cc");
-		rc = !!rc;
-	} else {
-		counter = PTRS_PER_PTE;
-		asm volatile(
-			"0:	rrbe	0,%2\n"
-			"	la	%2,0(%3,%2)\n"
-			"	brc	12,1f\n"
-			"	lhi	%0,1\n"
-			"1:	brct	%1,0b\n"
-			: "+d" (rc), "+d" (counter), "+a" (pmd_addr)
-			: "a" (4096UL) : "cc");
-	}
-	return rc;
+	pmd = *pmdp;
+	__pmd_idte(address, pmdp);
+	*pmdp = pmd_mkold(pmd);
+	return pmd_young(pmd);
 }
 
 #define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1510,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
  * exception will occur instead of a page translation exception. The
  * specifiation exception has the bad habit not to store necessary
  * information in the lowcore.
- * Bit 21 and bit 22 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 30 and 31 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
  * This leaves the bits 1-19 and bits 24-29 to store type and offset.
  * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
  * plus 24 for the offset.
@@ -1527,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
  * exception will occur instead of a page translation exception. The
  * specifiation exception has the bad habit not to store necessary
  * information in the lowcore.
- * Bit 53 and bit 54 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 62 and 63 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
  * This leaves the bits 0-51 and bits 56-61 to store type and offset.
  * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
  * plus 56 for the offset.
@@ -1547,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 {
 	pte_t pte;
 	offset &= __SWP_OFFSET_MASK;
-	pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
+	pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
 		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
 	return pte;
 }
@@ -1570,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 
 #define pgoff_to_pte(__off) \
 	((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
-		   | _PAGE_TYPE_FILE })
+		   | _PAGE_INVALID | _PAGE_PROTECT })
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0e6435b2f02..0eb37505cab1 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -43,6 +43,7 @@ extern void execve_tail(void);
 #ifndef CONFIG_64BIT
 
 #define TASK_SIZE		(1UL << 31)
+#define TASK_MAX_SIZE		(1UL << 31)
 #define TASK_UNMAPPED_BASE	(1UL << 30)
 
 #else /* CONFIG_64BIT */
@@ -51,6 +52,7 @@ extern void execve_tail(void);
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
 					(1UL << 30) : (1UL << 41))
 #define TASK_SIZE		TASK_SIZE_OF(current)
+#define TASK_MAX_SIZE		(1UL << 53)
 
 #endif /* CONFIG_64BIT */
 
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000000..5b3e48ef534b
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 80b6f11263c4..6dbd559763c9 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,6 +8,7 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
+#include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
@@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs)
 
 static inline void save_access_regs(unsigned int *acrs)
 {
-	asm volatile("stam 0,15,%0" : "=Q" (*acrs));
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
 }
 
 static inline void restore_access_regs(unsigned int *acrs)
 {
-	asm volatile("lam 0,15,%0" : : "Q" (*acrs));
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
 }
 
 #define switch_to(prev,next,last) do {					\
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 6d6d92b4ea11..2cb846c4b37f 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -63,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
+	__tlb_flush_mm_lazy(tlb->mm);
 	tlb_table_flush(tlb);
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
-	tlb_table_flush(tlb);
+	tlb_flush_mmu(tlb);
 }
 
 /*
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 6b32af30878c..f9fef0425fee 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 		__tlb_flush_full(mm);
 }
 
-static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
 	if (mm->context.flush_mm) {
 		__tlb_flush_mm(mm);
@@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-	__tlb_flush_mm_cond(mm);
+	__tlb_flush_mm_lazy(mm);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
-	__tlb_flush_mm_cond(vma->vm_mm);
+	__tlb_flush_mm_lazy(vma->vm_mm);
 }
 
 static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index be7a408be7a1..cc30d1fb000c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -18,6 +18,7 @@
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/sigp.h>
+#include <asm/irq.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 4
@@ -435,6 +436,11 @@ io_skip:
 io_loop:
 	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
+	lhi	%r3,IO_INTERRUPT
+	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
+	jz	io_call
+	lhi	%r3,THIN_INTERRUPT
+io_call:
 	basr	%r14,%r1		# call do_IRQ
 	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
 	jz	io_return
@@ -584,9 +590,10 @@ ext_skip:
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	TRACE_IRQS_OFF
+	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Ldo_extint)
-	basr	%r14,%r1		# call do_extint
+	lhi	%r3,EXT_INTERRUPT
+	basr	%r14,%r1		# call do_IRQ
 	j	io_return
 
 /*
@@ -879,13 +886,13 @@ cleanup_idle:
 	stm	%r9,%r10,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
-	n	%r8,BASED(cleanup_idle_wait)	# clear wait state bit
+	n	%r8,BASED(cleanup_idle_wait)	# clear irq & wait state bits
 	l	%r9,24(%r11)			# return from psw_idle
 	br	%r14
 cleanup_idle_insn:
 	.long	psw_idle_lpsw + 0x80000000
 cleanup_idle_wait:
-	.long	0xfffdffff
+	.long	0xfcfdffff
 
 /*
  * Integer constants
@@ -902,7 +909,6 @@ cleanup_idle_wait:
 .Ldo_machine_check:	.long	s390_do_machine_check
 .Lhandle_mcck:		.long	s390_handle_mcck
 .Ldo_IRQ:		.long	do_IRQ
-.Ldo_extint:		.long	do_extint
 .Ldo_signal:		.long	do_signal
 .Ldo_notify_resume:	.long	do_notify_resume
 .Ldo_per_trap:		.long	do_per_trap
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1c039d0c24c7..2b2188b97c6a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -19,6 +19,7 @@
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/sigp.h>
+#include <asm/irq.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -468,6 +469,11 @@ io_skip:
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 io_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,IO_INTERRUPT
+	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
+	jz	io_call
+	lghi	%r3,THIN_INTERRUPT
+io_call:
 	brasl	%r14,do_IRQ
 	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
 	jz	io_return
@@ -623,7 +629,8 @@ ext_skip:
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_extint
+	lghi	%r3,EXT_INTERRUPT
+	brasl	%r14,do_IRQ
 	j	io_return
 
 /*
@@ -922,7 +929,7 @@ cleanup_idle:
 	stg	%r9,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
-	nihh	%r8,0xfffd		# clear wait state bit
+	nihh	%r8,0xfcfd		# clear irq & wait state bits
 	lg	%r9,48(%r11)		# return from psw_idle
 	br	%r14
 cleanup_idle_insn:
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 54b0995514e8..b34ba0ea96a9 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -22,6 +22,7 @@
 #include <asm/cputime.h>
 #include <asm/lowcore.h>
 #include <asm/irq.h>
+#include <asm/hw_irq.h>
 #include "entry.h"
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -42,9 +43,10 @@ struct irq_class {
  * Since the external and I/O interrupt fields are already sums we would end
  * up with having a sum which accounts each interrupt twice.
  */
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
-	[EXTERNAL_INTERRUPT] = {.name = "EXT"},
-	[IO_INTERRUPT]	     = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+	[EXT_INTERRUPT]  = {.name = "EXT"},
+	[IO_INTERRUPT]	 = {.name = "I/O"},
+	[THIN_INTERRUPT] = {.name = "AIO"},
 };
 
 /*
@@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
 	[CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
 };
 
+void __init init_IRQ(void)
+{
+	irq_reserve_irqs(0, THIN_INTERRUPT);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
+	generic_handle_irq(irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
@@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(cpu)
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
+		goto out;
 	}
 	if (irq < NR_IRQS) {
+		if (irq >= NR_IRQS_BASE)
+			goto out;
 		seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
 		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
 		seq_putc(p, '\n');
-		goto skip_arch_irqs;
+		goto out;
 	}
 	for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
 		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
+			seq_printf(p, "%10u ",
+				   per_cpu(irq_stat, cpu).irqs[irq]);
 		if (irqclass_sub_desc[irq].desc)
 			seq_printf(p, "  %s", irqclass_sub_desc[irq].desc);
 		seq_putc(p, '\n');
 	}
-skip_arch_irqs:
+out:
 	put_online_cpus();
 	return 0;
 }
 
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	return 0;
+}
+
 /*
  * Switch to the asynchronous interrupt stack for softirq execution.
  */
@@ -159,14 +192,6 @@ asmlinkage void do_softirq(void)
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
-	if (proc_mkdir("irq", NULL))
-		create_prof_cpu_mask();
-}
-#endif
-
 /*
  * ext_int_hash[index] is the list head for all external interrupts that hash
  * to this index.
@@ -183,14 +208,6 @@ struct ext_int_info {
 /* ext_int_hash_lock protects the handler lists for external interrupts */
 DEFINE_SPINLOCK(ext_int_hash_lock);
 
-static void __init init_external_interrupts(void)
-{
-	int idx;
-
-	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
-		INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
-
 static inline int ext_hash(u16 code)
 {
 	return (code + (code >> 9)) & 0xff;
@@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
 }
 EXPORT_SYMBOL(unregister_external_interrupt);
 
-void __irq_entry do_extint(struct pt_regs *regs)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
 {
+	struct pt_regs *regs = get_irq_regs();
 	struct ext_code ext_code;
-	struct pt_regs *old_regs;
 	struct ext_int_info *p;
 	int index;
 
-	old_regs = set_irq_regs(regs);
-	irq_enter();
-	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
-		/* Serve timer interrupts first. */
-		clock_comparator_work();
-	}
-	kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
 	ext_code = *(struct ext_code *) &regs->int_code;
 	if (ext_code.code != 0x1004)
 		__get_cpu_var(s390_idle).nohz_delay = 1;
@@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs)
 			p->handler(ext_code, regs->int_parm,
 				   regs->int_parm_long);
 	rcu_read_unlock();
-	irq_exit();
-	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
 }
 
-void __init init_IRQ(void)
+static struct irqaction external_interrupt = {
+	.name	 = "EXT",
+	.handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
 {
-	init_external_interrupts();
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+		INIT_LIST_HEAD(&ext_int_hash[idx]);
+
+	irq_set_chip_and_handler(EXT_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	setup_irq(EXT_INTERRUPT, &external_interrupt);
 }
 
 static DEFINE_SPINLOCK(sc_irq_lock);
@@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void)
 	spin_unlock(&ma_subclass_lock);
 }
 EXPORT_SYMBOL(measurement_alert_subclass_unregister);
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	/*
-	 * Not needed, the handler is protected by a lock and IRQs that occur
-	 * after the handler is deleted are just NOPs.
-	 */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
-#endif
-
-#ifndef CONFIG_PCI
-
-/* Only PCI devices have dynamically-defined IRQ handlers */
-
-int request_irq(unsigned int irq, irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
-void enable_irq(unsigned int irq)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
-
-void disable_irq(unsigned int irq)
-{
-	WARN_ON(1);
-}
-EXPORT_SYMBOL_GPL(disable_irq);
-
-#endif /* !CONFIG_PCI */
-
-void disable_irq_nosync(unsigned int irq)
-{
-	disable_irq(irq);
-}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
-
-unsigned long probe_irq_on(void)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_on);
-
-int probe_irq_off(unsigned long val)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long val)
-{
-	return val;
-}
-EXPORT_SYMBOL_GPL(probe_irq_mask);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b2a07d..adbbe7f1cb0d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
 		fixup |= FIXUP_RETURN_REGISTER;
 		break;
 	case 0xeb:
-		if ((insn[2] & 0xff) == 0x44 ||	/* bxhg  */
-		    (insn[2] & 0xff) == 0x45)	/* bxleg */
+		switch (insn[2] & 0xff) {
+		case 0x44: /* bxhg  */
+		case 0x45: /* bxleg */
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
 		break;
 	case 0xe3:	/* bctg	*/
 		if ((insn[2] & 0xff) == 0x46)
 			fixup = FIXUP_BRANCH_NOT_TAKEN;
 		break;
+	case 0xec:
+		switch (insn[2] & 0xff) {
+		case 0xe5: /* clgrb */
+		case 0xe6: /* cgrb  */
+		case 0xf6: /* crb   */
+		case 0xf7: /* clrb  */
+		case 0xfc: /* cgib  */
+		case 0xfd: /* cglib */
+		case 0xfe: /* cib   */
+		case 0xff: /* clib  */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
 	}
 	return fixup;
 }
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 504175ebf8b0..c4c033819879 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			: "0", "cc");
 #endif
 	/* Revalidate clock comparator register */
-	if (S390_lowcore.clock_comparator == -1)
-		set_clock_comparator(S390_lowcore.mcck_clock);
-	else
-		set_clock_comparator(S390_lowcore.clock_comparator);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
 	if (!mci->wp)
 		/*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bc3eddae34a..c5dbb335716d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,6 +71,7 @@ void arch_cpu_idle(void)
 	}
 	/* Halt the cpu and keep track of cpu time accounting. */
 	vtime_stop_cpu();
+	local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e9fadb04e3c6..9556905bd3ce 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -60,11 +60,11 @@ void update_cr_regs(struct task_struct *task)
 
 		__ctl_store(cr, 0, 2);
 		cr_new[1] = cr[1];
-		/* Set or clear transaction execution TXC/PIFO bits 8 and 9. */
+		/* Set or clear transaction execution TXC bit 8. */
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new[0] = cr[0] & ~(3UL << 54);
+			cr_new[0] = cr[0] & ~(1UL << 55);
 		else
-			cr_new[0] = cr[0] | (3UL << 54);
+			cr_new[0] = cr[0] | (1UL << 55);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
 		cr_new[2] = cr[2] & ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
@@ -1299,7 +1299,7 @@ int regs_query_register_offset(const char *name)
 
 	if (!name || *name != 'r')
 		return -EINVAL;
-	if (strict_strtoul(name + 1, 10, &offset))
+	if (kstrtoul(name + 1, 10, &offset))
 		return -EINVAL;
 	if (offset >= NUM_GPRS)
 		return -EINVAL;
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index c479d2f9605b..737bff38e3ee 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -10,6 +10,9 @@
 #include <linux/suspend.h>
 #include <linux/mm.h>
 #include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
 
 /*
  * References to section boundaries
@@ -211,3 +214,11 @@ void restore_processor_state(void)
 	__ctl_set_bit(0,28);
 	local_mcck_enable();
 }
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+	lgr_info_log();
+	channel_subsystem_reinit();
+	zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index c487be4cfc81..6b09fdffbd2f 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -281,11 +281,8 @@ restore_registers:
 	lghi	%r2,0
 	brasl	%r14,arch_set_page_states
 
-	/* Log potential guest relocation */
-	brasl	%r14,lgr_info_log
-
-	/* Reinitialize the channel subsystem */
-	brasl	%r14,channel_subsystem_reinit
+	/* Call arch specific early resume code */
+	brasl	%r14,s390_early_resume
 
 	/* Return 0 */
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 876546b9cfa1..064c3082ab33 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -92,7 +92,6 @@ void clock_comparator_work(void)
 	struct clock_event_device *cd;
 
 	S390_lowcore.clock_comparator = -1ULL;
-	set_clock_comparator(S390_lowcore.clock_comparator);
 	cd = &__get_cpu_var(comparators);
 	cd->event_handler(cd);
 }
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb60..05d75c413137 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
 	else if (strncmp(s, "off", 4) == 0)
 		vdso_enabled = 0;
 	else {
-		rc = strict_strtoul(s, 0, &val);
+		rc = kstrtoul(s, 0, &val);
 		vdso_enabled = rc ? 0 : !!val;
 	}
 	return !rc;
@@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
 
 	clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
 		    PAGE_SIZE << SEGMENT_ORDER);
-	clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) page_table, _PAGE_INVALID,
 		    256*sizeof(unsigned long));
 
 	*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
-	*(unsigned long *) page_table = _PAGE_RO + page_frame;
+	*(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
 
 	psal = (u32 *) (page_table + 256*sizeof(unsigned long));
 	aste = psal + 32;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9b9c1b78ec67..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/vtimer.h>
+#include <asm/vtime.h>
 #include <asm/irq.h>
 #include "entry.h"
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3074475c8ae0..3a74d8af0d69 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 	 * The layout is as follows:
 	 * - gpr 2 contains the subchannel id (passed as addr)
 	 * - gpr 3 contains the virtqueue index (passed as datamatch)
+	 * - gpr 4 contains the index on the bus (optionally)
 	 */
-	ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
-				vcpu->run->s.regs.gprs[2],
-				8, &vcpu->run->s.regs.gprs[3]);
+	ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+				      vcpu->run->s.regs.gprs[2],
+				      8, &vcpu->run->s.regs.gprs[3],
+				      vcpu->run->s.regs.gprs[4]);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	/* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
+
+	/*
+	 * Return cookie in gpr 2, but don't overwrite the register if the
+	 * diagnose will be handled by userspace.
+	 */
+	if (ret != -EOPNOTSUPP)
+		vcpu->run->s.regs.gprs[2] = ret;
+	/* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
 	return ret < 0 ? ret : 0;
 }
 
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 302e0e52b009..99d789e8a018 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
 ({								\
 	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
 	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret = PTR_RET((void __force *)__uptr);		\
+	int __ret;						\
 								\
-	if (!__ret) {						\
+	if (IS_ERR((void __force *)__uptr)) {			\
+		__ret = PTR_ERR((void __force *)__uptr);	\
+	} else {						\
 		BUG_ON((unsigned long)__uptr & __mask);		\
 		__ret = get_user(x, __uptr);			\
 	}							\
@@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
 ({								\
 	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
 	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret = PTR_RET((void __force *)__uptr);		\
+	int __ret;						\
 								\
-	if (!__ret) {						\
+	if (IS_ERR((void __force *)__uptr)) {			\
+		__ret = PTR_ERR((void __force *)__uptr);	\
+	} else {						\
 		BUG_ON((unsigned long)__uptr & __mask);		\
 		__ret = put_user(x, __uptr);			\
 	}							\
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 34c1c9a90be2..776dafe918db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/facility.h>
 #include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-static unsigned long long *facilities;
+unsigned long *vfacilities;
 static struct gmap_notifier gmap_notifier;
 
+/* test availability of vfacility */
+static inline int test_vfacility(unsigned long nr)
+{
+	return __test_facility(nr, (void *) vfacilities);
+}
+
 /* Section: not file related */
 int kvm_arch_hardware_enable(void *garbage)
 {
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->ecb   = 6;
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
-	vcpu->arch.sie_block->fac   = (int) (long) facilities;
+	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
 		     (unsigned long) vcpu);
@@ -1063,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
@@ -1129,20 +1140,20 @@ static int __init kvm_s390_init(void)
 	 * to hold the maximum amount of facilities. On the other hand, we
 	 * only set facilities that are known to work in KVM.
 	 */
-	facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
-	if (!facilities) {
+	vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
+	if (!vfacilities) {
 		kvm_exit();
 		return -ENOMEM;
 	}
-	memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
-	facilities[0] &= 0xff82fff3f47c0000ULL;
-	facilities[1] &= 0x001c000000000000ULL;
+	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
+	vfacilities[0] &= 0xff82fff3f47c0000UL;
+	vfacilities[1] &= 0x001c000000000000UL;
 	return 0;
 }
 
 static void __exit kvm_s390_exit(void)
 {
-	free_page((unsigned long) facilities);
+	free_page((unsigned long) vfacilities);
 	kvm_exit();
 }
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 028ca9fd2158..dc99f1ca4267 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,6 +24,9 @@
 
 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 
+/* declare vfacilities extern */
+extern unsigned long *vfacilities;
+
 /* negativ values are error codes, positive values for internal conditions */
 #define SIE_INTERCEPT_RERUNVCPU		(1<<0)
 #define SIE_INTERCEPT_UCONTROL		(1<<1)
@@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+	vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
 void kvm_s390_tasklet(unsigned long parm);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4cdc54e63ebc..59200ee275e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -164,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	kfree(inti);
 no_interrupt:
 	/* Set condition code and we're done. */
-	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-	vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
+	kvm_s390_set_psw_cc(vcpu, cc);
 	return 0;
 }
 
@@ -220,15 +219,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 		 * Set condition code 3 to stop the guest from issueing channel
 		 * I/O instructions.
 		 */
-		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-		vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+		kvm_s390_set_psw_cc(vcpu, 3);
 		return 0;
 	}
 }
 
 static int handle_stfl(struct kvm_vcpu *vcpu)
 {
-	unsigned int facility_list;
 	int rc;
 
 	vcpu->stat.instruction_stfl++;
@@ -236,15 +233,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	/* only pass the facility bits, which we can handle */
-	facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
-
 	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
-			   &facility_list, sizeof(facility_list));
+			   vfacilities, 4);
 	if (rc)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-	VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
-	trace_kvm_s390_handle_stfl(vcpu, facility_list);
+	VCPU_EVENT(vcpu, 5, "store facility list value %x",
+		   *(unsigned int *) vfacilities);
+	trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
 	return 0;
 }
 
@@ -387,7 +382,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	if (fc > 3) {
-		vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;	  /* cc 3 */
+		kvm_s390_set_psw_cc(vcpu, 3);
 		return 0;
 	}
 
@@ -397,7 +392,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 
 	if (fc == 0) {
 		vcpu->run->s.regs.gprs[0] = 3 << 28;
-		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);  /* cc 0 */
+		kvm_s390_set_psw_cc(vcpu, 0);
 		return 0;
 	}
 
@@ -431,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
-	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	kvm_s390_set_psw_cc(vcpu, 0);
 	vcpu->run->s.regs.gprs[0] = 0;
 	return 0;
 out_no_data:
-	/* condition code 3 */
-	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+	kvm_s390_set_psw_cc(vcpu, 3);
 out_exception:
 	free_page(mem);
 	return rc;
@@ -494,12 +488,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
 
 	/* This basically extracts the mask half of the psw. */
-	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
+	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
 	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
 	if (reg2) {
-		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000;
+		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
 		vcpu->run->s.regs.gprs[reg2] |=
-			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff;
+			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
 	}
 	return 0;
 }
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index c61b9fad43cc..57c87d7d7ede 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs)
 	do {
 		set_clock_comparator(end);
 		vtime_stop_cpu();
-		local_irq_disable();
 	} while (get_tod_clock() < end);
 	lockdep_on();
 	__ctl_load(cr0, 0, 0);
@@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs)
 			set_clock_comparator(end);
 		}
 		vtime_stop_cpu();
-		local_irq_disable();
 		if (clock_saved)
 			local_tick_enable(clock_saved);
 	} while (get_tod_clock() < end);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 50ea137a2d3c..1694d738b175 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm,
 	switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
 		table = table + ((address >> 53) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x39UL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_REGION2:
 		table = table + ((address >> 42) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x3aUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_REGION3:
 		table = table + ((address >> 31) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV))
+		if (unlikely(*table & _REGION_ENTRY_INVALID))
 			return -0x3bUL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_SEGMENT:
 		table = table + ((address >> 20) & 0x7ff);
-		if (unlikely(*table & _SEGMENT_ENTRY_INV))
+		if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
 			return -0x10UL;
 		if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
-			if (write && (*table & _SEGMENT_ENTRY_RO))
+			if (write && (*table & _SEGMENT_ENTRY_PROTECT))
 				return -0x04UL;
 			return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
 				(address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
@@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm,
 	table = table + ((address >> 12) & 0xff);
 	if (unlikely(*table & _PAGE_INVALID))
 		return -0x11UL;
-	if (write && (*table & _PAGE_RO))
+	if (write && (*table & _PAGE_PROTECT))
 		return -0x04UL;
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 }
@@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm,
 	unsigned long *table = (unsigned long *)__pa(mm->pgd);
 
 	table = table + ((address >> 20) & 0x7ff);
-	if (unlikely(*table & _SEGMENT_ENTRY_INV))
+	if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
 		return -0x10UL;
 	table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
 	table = table + ((address >> 12) & 0xff);
 	if (unlikely(*table & _PAGE_INVALID))
 		return -0x11UL;
-	if (write && (*table & _PAGE_RO))
+	if (write && (*table & _PAGE_PROTECT))
 		return -0x04UL;
 	return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 }
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 3ad65b04ac15..46d517c3c763 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
 		seq_printf(m, "I\n");
 		return;
 	}
-	seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW ");
+	seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
 	seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : "   ");
 	seq_putc(m, '\n');
 }
@@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 }
 
 /*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
  */
 static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 			   pmd_t *pmd, unsigned long addr)
@@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
 		st->current_address = addr;
 		pte = pte_offset_kernel(pmd, addr);
-		prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+		prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
 		note_page(m, st, prot, 4);
 		addr += PAGE_SIZE;
 	}
 }
 
 #ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO)
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
 #else
 #define _PMD_PROT_MASK 0
 #endif
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215c..5d758db27bdc 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	pte_t *ptep, pte;
 	struct page *page;
 
-	mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
 
 	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
 	do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	struct page *head, *page, *tail;
 	int refs;
 
-	result = write ? 0 : _SEGMENT_ENTRY_RO;
-	mask = result | _SEGMENT_ENTRY_INV;
+	result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+	mask = result | _SEGMENT_ENTRY_INVALID;
 	if ((pmd_val(pmd) & mask) != result)
 		return 0;
 	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 121089d57802..248445f92604 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -8,21 +8,127 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+	int none, young, prot;
+	pmd_t pmd;
+
+	/*
+	 * Convert encoding		  pte bits	  pmd bits
+	 *				.IR...wrdytp	..R...I...y.
+	 * empty			.10...000000 -> ..0...1...0.
+	 * prot-none, clean, old	.11...000001 -> ..0...1...1.
+	 * prot-none, clean, young	.11...000101 -> ..1...1...1.
+	 * prot-none, dirty, old	.10...001001 -> ..0...1...1.
+	 * prot-none, dirty, young	.10...001101 -> ..1...1...1.
+	 * read-only, clean, old	.11...010001 -> ..1...1...0.
+	 * read-only, clean, young	.01...010101 -> ..1...0...1.
+	 * read-only, dirty, old	.11...011001 -> ..1...1...0.
+	 * read-only, dirty, young	.01...011101 -> ..1...0...1.
+	 * read-write, clean, old	.11...110001 -> ..0...1...0.
+	 * read-write, clean, young	.01...110101 -> ..0...0...1.
+	 * read-write, dirty, old	.10...111001 -> ..0...1...0.
+	 * read-write, dirty, young	.00...111101 -> ..0...0...1.
+	 * Huge ptes are dirty by definition, a clean pte is made dirty
+	 * by the conversion.
+	 */
+	if (pte_present(pte)) {
+		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+		if (pte_val(pte) & _PAGE_INVALID)
+			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+		none = (pte_val(pte) & _PAGE_PRESENT) &&
+			!(pte_val(pte) & _PAGE_READ) &&
+			!(pte_val(pte) & _PAGE_WRITE);
+		prot = (pte_val(pte) & _PAGE_PROTECT) &&
+			!(pte_val(pte) & _PAGE_WRITE);
+		young = pte_val(pte) & _PAGE_YOUNG;
+		if (none || young)
+			pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+		if (prot || (none && young))
+			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	} else
+		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+	return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+	pte_t pte;
+
+	/*
+	 * Convert encoding	  pmd bits	  pte bits
+	 *			..R...I...y.	.IR...wrdytp
+	 * empty		..0...1...0. -> .10...000000
+	 * prot-none, old	..0...1...1. -> .10...001001
+	 * prot-none, young	..1...1...1. -> .10...001101
+	 * read-only, old	..1...1...0. -> .11...011001
+	 * read-only, young	..1...0...1. -> .01...011101
+	 * read-write, old	..0...1...0. -> .10...111001
+	 * read-write, young	..0...0...1. -> .00...111101
+	 * Huge ptes are dirty by definition
+	 */
+	if (pmd_present(pmd)) {
+		pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+			(pmd_val(pmd) & PAGE_MASK);
+		if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+			pte_val(pte) |= _PAGE_INVALID;
+		if (pmd_prot_none(pmd)) {
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+				pte_val(pte) |= _PAGE_YOUNG;
+		} else {
+			pte_val(pte) |= _PAGE_READ;
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+				pte_val(pte) |= _PAGE_PROTECT;
+			else
+				pte_val(pte) |= _PAGE_WRITE;
+			if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+				pte_val(pte) |= _PAGE_YOUNG;
+		}
+	} else
+		pte_val(pte) = _PAGE_INVALID;
+	return pte;
+}
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *pteptr, pte_t pteval)
+		     pte_t *ptep, pte_t pte)
 {
-	pmd_t *pmdp = (pmd_t *) pteptr;
-	unsigned long mask;
+	pmd_t pmd;
 
+	pmd = __pte_to_pmd(pte);
 	if (!MACHINE_HAS_HPAGE) {
-		pteptr = (pte_t *) pte_page(pteval)[1].index;
-		mask = pte_val(pteval) &
-				(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
-		pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= pte_page(pte)[1].index;
+	} else
+		pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+	*(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+	unsigned long origin;
+	pmd_t pmd;
+
+	pmd = *(pmd_t *) ptep;
+	if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+		origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= *(unsigned long *) origin;
 	}
+	return __pmd_to_pte(pmd);
+}
 
-	pmd_val(*pmdp) = pte_val(pteval);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pmd_t *pmdp = (pmd_t *) ptep;
+	pte_t pte = huge_ptep_get(ptep);
+
+	if (MACHINE_HAS_IDTE)
+		__pmd_idte(addr, pmdp);
+	else
+		__pmd_csp(pmdp);
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+	return pte;
 }
 
 int arch_prepare_hugepage(struct page *page)
@@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page)
 	ptep = (pte_t *) page[1].index;
 	if (!ptep)
 		return;
-	clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) ptep, _PAGE_INVALID,
 		    PTRS_PER_PTE * sizeof(pte_t));
 	page_table_free(&init_mm, (unsigned long *) ptep);
 	page[1].index = 0;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 80adfbf75065..990397420e6b 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 		pte = pte_offset_kernel(pmd, address);
 		if (!enable) {
 			__ptep_ipte(address, pte);
-			pte_val(*pte) = _PAGE_TYPE_EMPTY;
+			pte_val(*pte) = _PAGE_INVALID;
 			continue;
 		}
 		pte_val(*pte) = __pa(address);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a8154a1a2c94..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
 	struct gmap_rmap *rmap;
 	struct page *page;
 
-	if (*table & _SEGMENT_ENTRY_INV)
+	if (*table & _SEGMENT_ENTRY_INVALID)
 		return 0;
 	page = pfn_to_page(*table >> PAGE_SHIFT);
 	mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
 		kfree(rmap);
 		break;
 	}
-	*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+	*table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
 	return 1;
 }
 
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap,
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	crst_table_init(new, init);
-	if (*table & _REGION_ENTRY_INV) {
+	if (*table & _REGION_ENTRY_INVALID) {
 		list_add(&page->lru, &gmap->crst_list);
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the guest addr space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 42) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 31) & 0x7ff);
-		if (*table & _REGION_ENTRY_INV)
+		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 20) & 0x7ff);
 
 		/* Clear segment table entry in guest address space. */
 		flush |= gmap_unlink_segment(gmap, table);
-		*table = _SEGMENT_ENTRY_INV;
+		*table = _SEGMENT_ENTRY_INVALID;
 	}
 out:
 	spin_unlock(&gmap->mm->page_table_lock);
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
-	if (len == 0 || from + len > PGDIR_SIZE ||
+	if (len == 0 || from + len > TASK_MAX_SIZE ||
 	    from + len < from || to + len < to)
 		return -EINVAL;
 
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the gmap address space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
 			goto out_unmap;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 42) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
 			goto out_unmap;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + (((to + off) >> 31) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INV) &&
+		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
 			goto out_unmap;
 		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 
 		/* Store 'from' address in an invalid segment table entry. */
 		flush |= gmap_unlink_segment(gmap, table);
-		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+		*table =  (from + off) | (_SEGMENT_ENTRY_INVALID |
+					  _SEGMENT_ENTRY_PROTECT);
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
 	unsigned long *table;
 
 	table = gmap->table + ((address >> 53) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 42) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 31) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INV))
+	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
 		return PTR_ERR(segment_ptr);
 	/* Convert the gmap address to an mm address. */
 	segment = *segment_ptr;
-	if (!(segment & _SEGMENT_ENTRY_INV)) {
+	if (!(segment & _SEGMENT_ENTRY_INVALID)) {
 		page = pfn_to_page(segment >> PAGE_SHIFT);
 		mp = (struct gmap_pgtable *) page->index;
 		return mp->vmaddr | (address & ~PMD_MASK);
-	} else if (segment & _SEGMENT_ENTRY_RO) {
+	} else if (segment & _SEGMENT_ENTRY_PROTECT) {
 		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
 		return vmaddr | (address & ~PMD_MASK);
 	}
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	mp = (struct gmap_pgtable *) page->index;
 	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
-		*rmap->entry =
-			_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+		*rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+					     _SEGMENT_ENTRY_PROTECT);
 		list_del(&rmap->list);
 		kfree(rmap);
 		flush = 1;
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 	/* Convert the gmap address to an mm address. */
 	while (1) {
 		segment = *segment_ptr;
-		if (!(segment & _SEGMENT_ENTRY_INV)) {
+		if (!(segment & _SEGMENT_ENTRY_INVALID)) {
 			/* Page table is present */
 			page = pfn_to_page(segment >> PAGE_SHIFT);
 			mp = (struct gmap_pgtable *) page->index;
 			return mp->vmaddr | (address & ~PMD_MASK);
 		}
-		if (!(segment & _SEGMENT_ENTRY_RO))
+		if (!(segment & _SEGMENT_ENTRY_PROTECT))
 			/* Nothing mapped in the gmap address space. */
 			break;
 		rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
 	while (address < to) {
 		/* Walk the gmap address space page table */
 		table = gmap->table + ((address >> 53) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 42) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 31) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INV)) {
+		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		table = table + ((address >> 20) & 0x7ff);
-		if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+		if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
 			address = (address + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
 			continue;
 		/* Set notification bit in the pgste of the pte */
 		entry = *ptep;
-		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
+		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
 			pgste = pgste_get_lock(ptep);
 			pgste_val(pgste) |= PGSTE_IN_BIT;
 			pgste_set_unlock(ptep, pgste);
@@ -731,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
 	spin_unlock(&gmap_notifier_lock);
 }
 
+static inline int page_table_with_pgste(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == 0;
+}
+
 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 						    unsigned long vmaddr)
 {
@@ -750,10 +756,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 	mp->vmaddr = vmaddr & PMD_MASK;
 	INIT_LIST_HEAD(&mp->mapper);
 	page->index = (unsigned long) mp;
-	atomic_set(&page->_mapcount, 3);
+	atomic_set(&page->_mapcount, 0);
 	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
+		    PAGE_SIZE/2);
 	return table;
 }
 
@@ -791,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-		unsigned long address, bits;
-		unsigned char skey;
+		unsigned long address, bits, skey;
 
 		address = pte_val(*ptep) & PAGE_MASK;
-		skey = page_get_storage_key(address);
+		skey = (unsigned long) page_get_storage_key(address);
 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
 		/* Set storage key ACC and FP */
-		page_set_storage_key(address,
-				(key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
-				!nq);
-
+		page_set_storage_key(address, skey, !nq);
 		/* Merge host changed & referenced into pgste  */
 		pgste_val(new) |= bits << 52;
-		/* Transfer skey changed & referenced bit to kvm user bits */
-		pgste_val(new) |= bits << 45;	/* PGSTE_UR_BIT & PGSTE_UC_BIT */
 	}
 	/* changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-		pgste_val(new) |= PGSTE_UC_BIT;
+		pgste_val(new) |= PGSTE_HC_BIT;
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(*ptep, ptl);
@@ -821,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
 
 #else /* CONFIG_PGSTE */
 
+static inline int page_table_with_pgste(struct page *page)
+{
+	return 0;
+}
+
 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 						    unsigned long vmaddr)
 {
@@ -878,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
 		pgtable_page_ctor(page);
 		atomic_set(&page->_mapcount, 1);
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	} else {
@@ -897,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	struct page *page;
 	unsigned int bit, mask;
 
-	if (mm_has_pgste(mm)) {
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (page_table_with_pgste(page)) {
 		gmap_disconnect_pgtable(mm, table);
 		return page_table_free_pgste(table);
 	}
 	/* Free 1K/2K page table fragment of a 4K page */
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
 	spin_lock_bh(&mm->context.list_lock);
 	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -940,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
 	unsigned int bit, mask;
 
 	mm = tlb->mm;
-	if (mm_has_pgste(mm)) {
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (page_table_with_pgste(page)) {
 		gmap_disconnect_pgtable(mm, table);
 		table = (unsigned long *) (__pa(table) | FRAG_MASK);
 		tlb_remove_table(tlb, table);
 		return;
 	}
 	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock_bh(&mm->context.list_lock);
 	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
 		list_del(&page->lru);
@@ -1007,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
 	struct mmu_table_batch **batch = &tlb->batch;
 
 	if (*batch) {
-		__tlb_flush_mm(tlb->mm);
 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
 	}
@@ -1017,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 {
 	struct mmu_table_batch **batch = &tlb->batch;
 
+	tlb->mm->context.flush_mm = 1;
 	if (*batch == NULL) {
 		*batch = (struct mmu_table_batch *)
 			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 		if (*batch == NULL) {
-			__tlb_flush_mm(tlb->mm);
+			__tlb_flush_mm_lazy(tlb->mm);
 			tlb_remove_table_one(table);
 			return;
 		}
@@ -1029,40 +1036,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	}
 	(*batch)->tables[(*batch)->nr++] = table;
 	if ((*batch)->nr == MAX_TABLE_BATCH)
-		tlb_table_flush(tlb);
+		tlb_flush_mmu(tlb);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void thp_split_vma(struct vm_area_struct *vma)
+static inline void thp_split_vma(struct vm_area_struct *vma)
 {
 	unsigned long addr;
-	struct page *page;
 
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
-		page = follow_page(vma, addr, FOLL_SPLIT);
-	}
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+		follow_page(vma, addr, FOLL_SPLIT);
 }
 
-void thp_split_mm(struct mm_struct *mm)
+static inline void thp_split_mm(struct mm_struct *mm)
 {
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
 
-	while (vma != NULL) {
+	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
 		thp_split_vma(vma);
 		vma->vm_flags &= ~VM_HUGEPAGE;
 		vma->vm_flags |= VM_NOHUGEPAGE;
-		vma = vma->vm_next;
 	}
+	mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
+				struct mm_struct *mm, pud_t *pud,
+				unsigned long addr, unsigned long end)
+{
+	unsigned long next, *table, *new;
+	struct page *page;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+again:
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		table = (unsigned long *) pmd_deref(*pmd);
+		page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+		if (page_table_with_pgste(page))
+			continue;
+		/* Allocate new page table with pgstes */
+		new = page_table_alloc_pgste(mm, addr);
+		if (!new) {
+			mm->context.has_pgste = 0;
+			continue;
+		}
+		spin_lock(&mm->page_table_lock);
+		if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
+			/* Nuke pmd entry pointing to the "short" page table */
+			pmdp_flush_lazy(mm, addr, pmd);
+			pmd_clear(pmd);
+			/* Copy ptes from old table to new table */
+			memcpy(new, table, PAGE_SIZE/2);
+			clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+			/* Establish new table */
+			pmd_populate(mm, pmd, (pte_t *) new);
+			/* Free old table with rcu, there might be a walker! */
+			page_table_free_rcu(tlb, table);
+			new = NULL;
+		}
+		spin_unlock(&mm->page_table_lock);
+		if (new) {
+			page_table_free_pgste(new);
+			goto again;
+		}
+	} while (pmd++, addr = next, addr != end);
+
+	return addr;
+}
+
+static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
+				   struct mm_struct *mm, pgd_t *pgd,
+				   unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pud_t *pud;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+
+	return addr;
+}
+
+static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
+			       unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pgd_t *pgd;
+
+	pgd = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
 /*
  * switch on pgstes for its userspace process (for kvm)
  */
 int s390_enable_sie(void)
 {
 	struct task_struct *tsk = current;
-	struct mm_struct *mm, *old_mm;
+	struct mm_struct *mm = tsk->mm;
+	struct mmu_gather tlb;
 
 	/* Do we have switched amode? If no, we cannot do sie */
 	if (s390_user_mode == HOME_SPACE_MODE)
@@ -1072,57 +1163,16 @@ int s390_enable_sie(void)
 	if (mm_has_pgste(tsk->mm))
 		return 0;
 
-	/* lets check if we are allowed to replace the mm */
-	task_lock(tsk);
-	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
-	    !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
-	    tsk->mm != tsk->active_mm) {
-		task_unlock(tsk);
-		return -EINVAL;
-	}
-	task_unlock(tsk);
-
-	/* we copy the mm and let dup_mm create the page tables with_pgstes */
-	tsk->mm->context.alloc_pgste = 1;
-	/* make sure that both mms have a correct rss state */
-	sync_mm_rss(tsk->mm);
-	mm = dup_mm(tsk);
-	tsk->mm->context.alloc_pgste = 0;
-	if (!mm)
-		return -ENOMEM;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	down_write(&mm->mmap_sem);
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
-	mm->def_flags |= VM_NOHUGEPAGE;
-#endif
-
-	/* Now lets check again if something happened */
-	task_lock(tsk);
-	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
-	    !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
-	    tsk->mm != tsk->active_mm) {
-		mmput(mm);
-		task_unlock(tsk);
-		return -EINVAL;
-	}
-
-	/* ok, we are alone. No ptrace, no threads, etc. */
-	old_mm = tsk->mm;
-	tsk->mm = tsk->active_mm = mm;
-	preempt_disable();
-	update_mm(mm, tsk);
-	atomic_inc(&mm->context.attach_count);
-	atomic_dec(&old_mm->context.attach_count);
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-	preempt_enable();
-	task_unlock(tsk);
-	mmput(old_mm);
-	return 0;
+	/* Reallocate the page tables with pgstes */
+	mm->context.has_pgste = 1;
+	tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
+	page_table_realloc(&tlb, mm, 0, TASK_SIZE);
+	tlb_finish_mmu(&tlb, 0, TASK_SIZE);
+	up_write(&mm->mmap_sem);
+	return mm->context.has_pgste ? 0 : -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
@@ -1198,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 		list_del(lh);
 	}
 	ptep = (pte_t *) pgtable;
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 	ptep++;
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	pte_val(*ptep) = _PAGE_INVALID;
 	return pgtable;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8b268fcc4612..bcfb70b60be6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
 		pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
 	if (!pte)
 		return NULL;
-	clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+	clear_table((unsigned long *) pte, _PAGE_INVALID,
 		    PTRS_PER_PTE * sizeof(pte_t));
 	return pte;
 }
@@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
 			pud_val(*pu_dir) = __pa(address) |
 				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
-				(ro ? _REGION_ENTRY_RO : 0);
+				(ro ? _REGION_ENTRY_PROTECT : 0);
 			address += PUD_SIZE;
 			continue;
 		}
@@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
 			pmd_val(*pm_dir) = __pa(address) |
 				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
-				(ro ? _SEGMENT_ENTRY_RO : 0);
+				_SEGMENT_ENTRY_YOUNG |
+				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
 			address += PMD_SIZE;
 			continue;
 		}
@@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
+		pte_val(*pt_dir) = __pa(address) |
+			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
 		address += PAGE_SIZE;
 	}
 	ret = 0;
@@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 	pte_t *pt_dir;
 	pte_t  pte;
 
-	pte_val(pte) = _PAGE_TYPE_EMPTY;
+	pte_val(pte) = _PAGE_INVALID;
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
@@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 			new_page =__pa(vmem_alloc_pages(0));
 			if (!new_page)
 				goto out;
-			pte_val(*pt_dir) = __pa(new_page);
+			pte_val(*pt_dir) =
+				__pa(new_page) | pgprot_val(PAGE_KERNEL);
 		}
 		address += PAGE_SIZE;
 	}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 930783d2c99b..04e1b6a85362 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -346,16 +346,15 @@ static const struct file_operations timer_enabled_fops = {
 };
 
 
-static int oprofile_create_hwsampling_files(struct super_block *sb,
-					    struct dentry *root)
+static int oprofile_create_hwsampling_files(struct dentry *root)
 {
 	struct dentry *dir;
 
-	dir = oprofilefs_mkdir(sb, root, "timer");
+	dir = oprofilefs_mkdir(root, "timer");
 	if (!dir)
 		return -EINVAL;
 
-	oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
 
 	if (!hwsampler_available)
 		return 0;
@@ -376,17 +375,17 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
 		 * and can only be set to 0.
 		 */
 
-		dir = oprofilefs_mkdir(sb, root, "0");
+		dir = oprofilefs_mkdir(root, "0");
 		if (!dir)
 			return -EINVAL;
 
-		oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
-		oprofilefs_create_file(sb, dir, "event", &zero_fops);
-		oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
-		oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
-		oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
-		oprofilefs_create_file(sb, dir, "user", &user_fops);
-		oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
+		oprofilefs_create_file(dir, "event", &zero_fops);
+		oprofilefs_create_file(dir, "count", &hw_interval_fops);
+		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
+		oprofilefs_create_file(dir, "kernel", &kernel_fops);
+		oprofilefs_create_file(dir, "user", &user_fops);
+		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
 					&oprofile_sdbt_blocks);
 
 	} else {
@@ -396,19 +395,19 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
 		 * space tools.  The /dev/oprofile/hwsampling fs is
 		 * provided in that case.
 		 */
-		dir = oprofilefs_mkdir(sb, root, "hwsampling");
+		dir = oprofilefs_mkdir(root, "hwsampling");
 		if (!dir)
 			return -EINVAL;
 
-		oprofilefs_create_file(sb, dir, "hwsampler",
+		oprofilefs_create_file(dir, "hwsampler",
 				       &hwsampler_fops);
-		oprofilefs_create_file(sb, dir, "hw_interval",
+		oprofilefs_create_file(dir, "hw_interval",
 				       &hw_interval_fops);
-		oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
 					   &oprofile_min_interval);
-		oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
 					   &oprofile_max_interval);
-		oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
 					&oprofile_sdbt_blocks);
 	}
 	return 0;
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 086a2e37935d..a9e1dc4ae442 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,5 +2,5 @@
 # Makefile for the s390 PCI subsystem.
 #
 
-obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \
+obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_sysfs.o \
 			   pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e2956ad39a4f..f17a8343e360 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,45 +42,26 @@
 #define	SIC_IRQ_MODE_SINGLE		1
 
 #define ZPCI_NR_DMA_SPACES		1
-#define ZPCI_MSI_VEC_BITS		6
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
 
 /* list of all detected zpci devices */
-LIST_HEAD(zpci_list);
-EXPORT_SYMBOL_GPL(zpci_list);
-DEFINE_MUTEX(zpci_list_lock);
-EXPORT_SYMBOL_GPL(zpci_list_lock);
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
 
-static struct pci_hp_callback_ops *hotplug_ops;
+static void zpci_enable_irq(struct irq_data *data);
+static void zpci_disable_irq(struct irq_data *data);
 
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
-static DEFINE_SPINLOCK(zpci_domain_lock);
-
-struct callback {
-	irq_handler_t	handler;
-	void		*data;
+static struct irq_chip zpci_irq_chip = {
+	.name = "zPCI",
+	.irq_unmask = zpci_enable_irq,
+	.irq_mask = zpci_disable_irq,
 };
 
-struct zdev_irq_map {
-	unsigned long	aibv;		/* AI bit vector */
-	int		msi_vecs;	/* consecutive MSI-vectors used */
-	int		__unused;
-	struct callback	cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
-	spinlock_t	lock;		/* protect callbacks against de-reg */
-};
-
-struct intr_bucket {
-	/* amap of adapters, one bit per dev, corresponds to one irq nr */
-	unsigned long	*alloc;
-	/* AI summary bit, global page for all devices */
-	unsigned long	*aisb;
-	/* pointer to aibv and callback data in zdev */
-	struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
-	/* protects the whole bucket struct */
-	spinlock_t	lock;
-};
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
 
-static struct intr_bucket *bucket;
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
 
 /* Adapter interrupt definitions */
 static void zpci_irq_handler(struct airq_struct *airq);
@@ -96,27 +77,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
-static struct kmem_cache *zdev_irq_cache;
 static struct kmem_cache *zdev_fmb_cache;
 
-static inline int irq_to_msi_nr(unsigned int irq)
-{
-	return irq & ZPCI_MSI_MASK;
-}
-
-static inline int irq_to_dev_nr(unsigned int irq)
-{
-	return irq >> ZPCI_MSI_VEC_BITS;
-}
-
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
-	return bucket->imap[irq_to_dev_nr(irq)];
-}
-
 struct zpci_dev *get_zdev(struct pci_dev *pdev)
 {
 	return (struct zpci_dev *) pdev->sysdata;
@@ -126,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 	struct zpci_dev *tmp, *zdev = NULL;
 
-	mutex_lock(&zpci_list_lock);
+	spin_lock(&zpci_list_lock);
 	list_for_each_entry(tmp, &zpci_list, entry) {
 		if (tmp->fid == fid) {
 			zdev = tmp;
 			break;
 		}
 	}
-	mutex_unlock(&zpci_list_lock);
+	spin_unlock(&zpci_list_lock);
 	return zdev;
 }
 
-bool zpci_fid_present(u32 fid)
-{
-	return (get_zdev_by_fid(fid) != NULL) ? true : false;
-}
-
 static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
 {
 	return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -160,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 
 /* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
-			      u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
 	struct zpci_fib *fib;
@@ -172,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
 		return -ENOMEM;
 
 	fib->isc = PCI_ISC;
-	fib->noi = zdev->irq_map->msi_vecs;
 	fib->sum = 1;		/* enable summary notifications */
-	fib->aibv = aibv;
-	fib->aibvo = 0;		/* every function has its own page */
-	fib->aisb = (u64) bucket->aisb + aisb / 8;
-	fib->aisbo = aisb & ZPCI_MSI_MASK;
+	fib->noi = airq_iv_end(zdev->aibv);
+	fib->aibv = (unsigned long) zdev->aibv->vector;
+	fib->aibvo = 0;		/* each zdev has its own interrupt vector */
+	fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib->aisbo = zdev->aisb & 63;
 
-	rc = s390pci_mod_fc(req, fib);
+	rc = zpci_mod_fc(req, fib);
 	pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
 
 	free_page((unsigned long) fib);
@@ -209,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args
 	fib->iota = args->iota;
 	fib->fmb_addr = args->fmb_addr;
 
-	rc = s390pci_mod_fc(req, fib);
+	rc = zpci_mod_fc(req, fib);
 	free_page((unsigned long) fib);
 	return rc;
 }
@@ -234,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 }
 
 /* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
 {
 	struct mod_pci_args args = { 0, 0, 0, 0 };
 
@@ -283,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
 	u64 data;
 	int rc;
 
-	rc = s390pci_load(&data, req, offset);
+	rc = zpci_load(&data, req, offset);
 	if (!rc) {
 		data = data << ((8 - len) * 8);
 		data = le64_to_cpu(data);
@@ -301,25 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 
 	data = cpu_to_le64(data);
 	data = data >> ((8 - len) * 8);
-	rc = s390pci_store(data, req, offset);
+	rc = zpci_store(data, req, offset);
 	return rc;
 }
 
-void enable_irq(unsigned int irq)
+static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
+{
+	int offset, pos;
+	u32 mask_bits;
+
+	if (msi->msi_attrib.is_msix) {
+		offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+			PCI_MSIX_ENTRY_VECTOR_CTRL;
+		msi->masked = readl(msi->mask_base + offset);
+		writel(flag, msi->mask_base + offset);
+	} else if (msi->msi_attrib.maskbit) {
+		pos = (long) msi->mask_base;
+		pci_read_config_dword(msi->dev, pos, &mask_bits);
+		mask_bits &= ~(mask);
+		mask_bits |= flag & mask;
+		pci_write_config_dword(msi->dev, pos, mask_bits);
+	} else
+		return 0;
+
+	msi->msi_attrib.maskbit = !!flag;
+	return 1;
+}
+
+static void zpci_enable_irq(struct irq_data *data)
 {
-	struct msi_desc *msi = irq_get_msi_desc(irq);
+	struct msi_desc *msi = irq_get_msi_desc(data->irq);
 
 	zpci_msi_set_mask_bits(msi, 1, 0);
 }
-EXPORT_SYMBOL_GPL(enable_irq);
 
-void disable_irq(unsigned int irq)
+static void zpci_disable_irq(struct irq_data *data)
 {
-	struct msi_desc *msi = irq_get_msi_desc(irq);
+	struct msi_desc *msi = irq_get_msi_desc(data->irq);
 
 	zpci_msi_set_mask_bits(msi, 1, 1);
 }
-EXPORT_SYMBOL_GPL(disable_irq);
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
@@ -404,152 +381,147 @@ static struct pci_ops pci_root_ops = {
 	.write = pci_write,
 };
 
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
 static void zpci_irq_handler(struct airq_struct *airq)
 {
-	unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
-	int rescan = 0, max = aisb_max;
-	struct zdev_irq_map *imap;
+	unsigned long si, ai;
+	struct airq_iv *aibv;
+	int irqs_on = 0;
 
 	inc_irq_stat(IRQIO_PCI);
-	sbit = start;
-
-scan:
-	/* find summary_bit */
-	for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
-		clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
-		last = sbit;
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+			si = 0;
+			continue;
+		}
 
-		/* find vector bit */
-		imap = bucket->imap[sbit];
-		for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+		/* Scan the adapter interrupt vector for this device. */
+		aibv = zpci_aibv[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+			if (ai == -1UL)
+				break;
 			inc_irq_stat(IRQIO_MSI);
-			clear_bit(63 - mbit, &imap->aibv);
-
-			spin_lock(&imap->lock);
-			if (imap->cb[mbit].handler)
-				imap->cb[mbit].handler(mbit,
-					imap->cb[mbit].data);
-			spin_unlock(&imap->lock);
+			airq_iv_lock(aibv, ai);
+			generic_handle_irq(airq_iv_get_data(aibv, ai));
+			airq_iv_unlock(aibv, ai);
 		}
 	}
-
-	if (rescan)
-		goto out;
-
-	/* scan the skipped bits */
-	if (start > 0) {
-		sbit = 0;
-		max = start;
-		start = 0;
-		goto scan;
-	}
-
-	/* enable interrupts again */
-	set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
-	/* check again to not lose initiative */
-	rmb();
-	max = aisb_max;
-	sbit = find_first_bit_left(bucket->aisb, max);
-	if (sbit != max) {
-		rescan++;
-		goto scan;
-	}
-out:
-	/* store next device bit to scan */
-	__get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
 }
 
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
-	unsigned int aisb, msi_nr;
+	unsigned int hwirq, irq, msi_vecs;
+	unsigned long aisb;
 	struct msi_desc *msi;
+	struct msi_msg msg;
 	int rc;
 
-	/* store the number of used MSI vectors */
-	zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
-
-	spin_lock(&bucket->lock);
-	aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
-	/* alloc map exhausted? */
-	if (aisb == PAGE_SIZE) {
-		spin_unlock(&bucket->lock);
-		return -EIO;
-	}
-	set_bit(aisb, bucket->alloc);
-	spin_unlock(&bucket->lock);
+	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
+	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
+		return -EINVAL;
+	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
 
+	/* Allocate adapter summary indicator bit */
+	rc = -EIO;
+	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	if (aisb == -1UL)
+		goto out;
 	zdev->aisb = aisb;
-	if (aisb + 1 > aisb_max)
-		aisb_max = aisb + 1;
 
-	/* wire up IRQ shortcut pointer */
-	bucket->imap[zdev->aisb] = zdev->irq_map;
-	pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+	/* Create adapter interrupt vector */
+	rc = -ENOMEM;
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+	if (!zdev->aibv)
+		goto out_si;
 
-	/* TODO: irq number 0 wont be found if we return less than requested MSIs.
-	 * ignore it for now and fix in common code.
-	 */
-	msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+	/* Wire up shortcut pointer */
+	zpci_aibv[aisb] = zdev->aibv;
 
+	/* Request MSI interrupts */
+	hwirq = 0;
 	list_for_each_entry(msi, &pdev->msi_list, list) {
-		rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
-					  aisb << ZPCI_MSI_VEC_BITS);
+		rc = -EIO;
+		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
+		if (irq == NO_IRQ)
+			goto out_msi;
+		rc = irq_set_msi_desc(irq, msi);
 		if (rc)
-			return rc;
-		msi_nr++;
+			goto out_msi;
+		irq_set_chip_and_handler(irq, &zpci_irq_chip,
+					 handle_simple_irq);
+		msg.data = hwirq;
+		msg.address_lo = zdev->msi_addr & 0xffffffff;
+		msg.address_hi = zdev->msi_addr >> 32;
+		write_msi_msg(irq, &msg);
+		airq_iv_set_data(zdev->aibv, hwirq, irq);
+		hwirq++;
 	}
 
-	rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
-	if (rc) {
-		clear_bit(aisb, bucket->alloc);
-		dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
-		return rc;
+	/* Enable adapter interrupts */
+	rc = zpci_set_airq(zdev);
+	if (rc)
+		goto out_msi;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		if (hwirq-- == 0)
+			break;
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
 	}
-	return (zdev->irq_map->msi_vecs == msi_vecs) ?
-		0 : zdev->irq_map->msi_vecs;
+	zpci_aibv[aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+out_si:
+	airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+	dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
+	return rc;
 }
 
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 	struct msi_desc *msi;
-	int aisb, rc;
+	int rc;
 
-	rc = zpci_unregister_airq(zdev);
+	pr_info("%s: on pdev: %p\n", __func__, pdev);
+
+	/* Disable adapter interrupts */
+	rc = zpci_clear_airq(zdev);
 	if (rc) {
 		dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
 		return;
 	}
 
-	msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
-	aisb = irq_to_dev_nr(msi->irq);
-
-	list_for_each_entry(msi, &pdev->msi_list, list)
-		zpci_teardown_msi_irq(zdev, msi);
-
-	clear_bit(aisb, bucket->alloc);
-	if (aisb + 1 == aisb_max)
-		aisb_max--;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
-	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
-		return -EINVAL;
-	return zpci_setup_msi(pdev, nvec);
-}
+	/* Release MSI interrupts */
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		zpci_msi_set_mask_bits(msi, 1, 1);
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
 
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	pr_info("%s: on pdev: %p\n", __func__, pdev);
-	zpci_teardown_msi(pdev);
+	zpci_aibv[zdev->aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+	airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
 }
 
 static void zpci_map_resources(struct zpci_dev *zdev)
@@ -564,8 +536,6 @@ static void zpci_map_resources(struct zpci_dev *zdev)
 			continue;
 		pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
-		pr_debug("BAR%i: -> start: %Lx  end: %Lx\n",
-			i, pdev->resource[i].start, pdev->resource[i].end);
 	}
 }
 
@@ -589,162 +559,47 @@ struct zpci_dev *zpci_alloc_device(void)
 
 	/* Alloc memory for our private pci device data */
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
-	if (!zdev)
-		return ERR_PTR(-ENOMEM);
-
-	/* Alloc aibv & callback space */
-	zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
-	if (!zdev->irq_map)
-		goto error;
-	WARN_ON((u64) zdev->irq_map & 0xff);
-	return zdev;
-
-error:
-	kfree(zdev);
-	return ERR_PTR(-ENOMEM);
+	return zdev ? : ERR_PTR(-ENOMEM);
 }
 
 void zpci_free_device(struct zpci_dev *zdev)
 {
-	kmem_cache_free(zdev_irq_cache, zdev->irq_map);
 	kfree(zdev);
 }
 
-/*
- * Too late for any s390 specific setup, since interrupts must be set up
- * already which requires DMA setup too and the pci scan will access the
- * config space, which only works if the function handle is enabled.
- */
-int pcibios_enable_device(struct pci_dev *pdev, int mask)
-{
-	struct resource *res;
-	u16 cmd;
-	int i;
-
-	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		res = &pdev->resource[i];
-
-		if (res->flags & IORESOURCE_IO)
-			return -EINVAL;
-
-		if (res->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	pci_write_config_word(pdev, PCI_COMMAND, cmd);
-	return 0;
-}
-
 int pcibios_add_platform_entries(struct pci_dev *pdev)
 {
 	return zpci_sysfs_add_device(&pdev->dev);
 }
 
-int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
-{
-	int msi_nr = irq_to_msi_nr(irq);
-	struct zdev_irq_map *imap;
-	struct msi_desc *msi;
-
-	msi = irq_get_msi_desc(irq);
-	if (!msi)
-		return -EIO;
-
-	imap = get_imap(irq);
-	spin_lock_init(&imap->lock);
-
-	pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
-	imap->cb[msi_nr].handler = handler;
-	imap->cb[msi_nr].data = data;
-
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	zpci_msi_set_mask_bits(msi, 1, 0);
-	return 0;
-}
-
-void zpci_free_irq(unsigned int irq)
-{
-	struct zdev_irq_map *imap = get_imap(irq);
-	int msi_nr = irq_to_msi_nr(irq);
-	unsigned long flags;
-
-	pr_debug("%s: for irq: %d\n", __func__, irq);
-
-	spin_lock_irqsave(&imap->lock, flags);
-	imap->cb[msi_nr].handler = NULL;
-	imap->cb[msi_nr].data = NULL;
-	spin_unlock_irqrestore(&imap->lock, flags);
-}
-
-int request_irq(unsigned int irq, irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
-{
-	pr_debug("%s: irq: %d  handler: %p  flags: %lx  dev: %s\n",
-		__func__, irq, handler, irqflags, devname);
-
-	return zpci_request_irq(irq, handler, dev_id);
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	zpci_free_irq(irq);
-}
-EXPORT_SYMBOL_GPL(free_irq);
-
 static int __init zpci_irq_init(void)
 {
-	int cpu, rc;
-
-	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
-	if (!bucket)
-		return -ENOMEM;
-
-	bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->aisb) {
-		rc = -ENOMEM;
-		goto out_aisb;
-	}
-
-	bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-	if (!bucket->alloc) {
-		rc = -ENOMEM;
-		goto out_alloc;
-	}
+	int rc;
 
 	rc = register_adapter_interrupt(&zpci_airq);
 	if (rc)
-		goto out_ai;
+		goto out;
 	/* Set summary to 1 to be called every time for the ISC. */
 	*zpci_airq.lsi_ptr = 1;
 
-	for_each_online_cpu(cpu)
-		per_cpu(next_sbit, cpu) = 0;
+	rc = -ENOMEM;
+	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_aisb_iv)
+		goto out_airq;
 
-	spin_lock_init(&bucket->lock);
-	set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 
-out_ai:
-	free_page((unsigned long) bucket->alloc);
-out_alloc:
-	free_page((unsigned long) bucket->aisb);
-out_aisb:
-	kfree(bucket);
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
 	return rc;
 }
 
 static void zpci_irq_exit(void)
 {
-	free_page((unsigned long) bucket->alloc);
-	free_page((unsigned long) bucket->aisb);
+	airq_iv_release(zpci_aisb_iv);
 	unregister_adapter_interrupt(&zpci_airq);
-	kfree(bucket);
 }
 
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
@@ -801,16 +656,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 int pcibios_add_device(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
+	struct resource *res;
+	int i;
+
+	zdev->pdev = pdev;
+	zpci_map_resources(zdev);
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		res = &pdev->resource[i];
+		if (res->parent || !res->flags)
+			continue;
+		pci_claim_resource(pdev, i);
+	}
+
+	return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	struct resource *res;
+	u16 cmd;
+	int i;
 
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
 	zpci_fmb_enable_device(zdev);
 	zpci_map_resources(zdev);
 
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		res = &pdev->resource[i];
+
+		if (res->flags & IORESOURCE_IO)
+			return -EINVAL;
+
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	pci_write_config_word(pdev, PCI_COMMAND, cmd);
 	return 0;
 }
 
-void pcibios_release_device(struct pci_dev *pdev)
+void pcibios_disable_device(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 
@@ -898,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev)
 	rc = zpci_dma_init_device(zdev);
 	if (rc)
 		goto out_dma;
+
+	zdev->state = ZPCI_FN_STATE_ONLINE;
 	return 0;
 
 out_dma:
@@ -926,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev)
 		rc = zpci_enable_device(zdev);
 		if (rc)
 			goto out_free;
-
-		zdev->state = ZPCI_FN_STATE_ONLINE;
 	}
 	rc = zpci_scan_bus(zdev);
 	if (rc)
 		goto out_disable;
 
-	mutex_lock(&zpci_list_lock);
+	spin_lock(&zpci_list_lock);
 	list_add_tail(&zdev->entry, &zpci_list);
-	if (hotplug_ops)
-		hotplug_ops->create_slot(zdev);
-	mutex_unlock(&zpci_list_lock);
+	spin_unlock(&zpci_list_lock);
+
+	zpci_init_slot(zdev);
 
 	return 0;
 
@@ -967,15 +855,10 @@ static inline int barsize(u8 size)
 
 static int zpci_mem_init(void)
 {
-	zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
-				L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
-	if (!zdev_irq_cache)
-		goto error_zdev;
-
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 				16, 0, NULL);
 	if (!zdev_fmb_cache)
-		goto error_fmb;
+		goto error_zdev;
 
 	/* TODO: use realloc */
 	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -986,8 +869,6 @@ static int zpci_mem_init(void)
 
 error_iomap:
 	kmem_cache_destroy(zdev_fmb_cache);
-error_fmb:
-	kmem_cache_destroy(zdev_irq_cache);
 error_zdev:
 	return -ENOMEM;
 }
@@ -995,28 +876,10 @@ error_zdev:
 static void zpci_mem_exit(void)
 {
 	kfree(zpci_iomap_start);
-	kmem_cache_destroy(zdev_irq_cache);
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 
-void zpci_register_hp_ops(struct pci_hp_callback_ops *ops)
-{
-	mutex_lock(&zpci_list_lock);
-	hotplug_ops = ops;
-	mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_register_hp_ops);
-
-void zpci_deregister_hp_ops(void)
-{
-	mutex_lock(&zpci_list_lock);
-	hotplug_ops = NULL;
-	mutex_unlock(&zpci_list_lock);
-}
-EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops);
-
-unsigned int s390_pci_probe;
-EXPORT_SYMBOL_GPL(s390_pci_probe);
+static unsigned int s390_pci_probe;
 
 char * __init pcibios_setup(char *str)
 {
@@ -1044,16 +907,12 @@ static int __init pci_base_init(void)
 
 	rc = zpci_debug_init();
 	if (rc)
-		return rc;
+		goto out;
 
 	rc = zpci_mem_init();
 	if (rc)
 		goto out_mem;
 
-	rc = zpci_msihash_init();
-	if (rc)
-		goto out_hash;
-
 	rc = zpci_irq_init();
 	if (rc)
 		goto out_irq;
@@ -1062,7 +921,7 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_dma;
 
-	rc = clp_find_pci_devices();
+	rc = clp_scan_pci_devices();
 	if (rc)
 		goto out_find;
 
@@ -1073,11 +932,15 @@ out_find:
 out_dma:
 	zpci_irq_exit();
 out_irq:
-	zpci_msihash_exit();
-out_hash:
 	zpci_mem_exit();
 out_mem:
 	zpci_debug_exit();
+out:
 	return rc;
 }
-subsys_initcall(pci_base_init);
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+	clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 2e9539625d93..475563c3d1e4 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data)
 	return cc;
 }
 
-static void *clp_alloc_block(void)
+static void *clp_alloc_block(gfp_t gfp_mask)
 {
-	return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+	return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
 }
 
 static void clp_free_block(void *ptr)
@@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
 	struct clp_req_rsp_query_pci_grp *rrb;
 	int rc;
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 		return -ENOMEM;
 
@@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
 	struct clp_req_rsp_query_pci *rrb;
 	int rc;
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 		return -ENOMEM;
 
@@ -179,9 +179,9 @@ error:
 static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 {
 	struct clp_req_rsp_set_pci *rrb;
-	int rc, retries = 1000;
+	int rc, retries = 100;
 
-	rrb = clp_alloc_block();
+	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 		return -ENOMEM;
 
@@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 			retries--;
 			if (retries < 0)
 				break;
-			msleep(1);
+			msleep(20);
 		}
 	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
 
@@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev)
 	return rc;
 }
 
-static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry)
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+			void (*cb)(struct clp_fh_list_entry *entry))
 {
-	int present, rc;
-
-	if (!entry->vendor_id)
-		return;
-
-	/* TODO: be a little bit more scalable */
-	present = zpci_fid_present(entry->fid);
-
-	if (present)
-		pr_debug("%s: device %x already present\n", __func__, entry->fid);
-
-	/* skip already used functions */
-	if (present && entry->config_state)
-		return;
-
-	/* aev 306: function moved to stand-by state */
-	if (present && !entry->config_state) {
-		/*
-		 * The handle is already disabled, that means no iota/irq freeing via
-		 * the firmware interfaces anymore. Need to free resources manually
-		 * (DMA memory, debug, sysfs)...
-		 */
-		zpci_stop_device(get_zdev_by_fid(entry->fid));
-		return;
-	}
-
-	rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
-	if (rc)
-		pr_err("Failed to add fid: 0x%x\n", entry->fid);
-}
-
-int clp_find_pci_devices(void)
-{
-	struct clp_req_rsp_list_pci *rrb;
 	u64 resume_token = 0;
 	int entries, i, rc;
 
-	rrb = clp_alloc_block();
-	if (!rrb)
-		return -ENOMEM;
-
 	do {
 		memset(rrb, 0, sizeof(*rrb));
 		rrb->request.hdr.len = sizeof(rrb->request);
@@ -316,12 +279,101 @@ int clp_find_pci_devices(void)
 		resume_token = rrb->response.resume_token;
 
 		for (i = 0; i < entries; i++)
-			clp_check_pcifn_entry(&rrb->response.fh_list[i]);
+			cb(&rrb->response.fh_list[i]);
 	} while (resume_token);
 
 	pr_debug("Maximum number of supported PCI functions: %u\n",
 		rrb->response.max_fn);
 out:
+	return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+	if (!entry->vendor_id)
+		return;
+
+	clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev) {
+		clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+		return;
+	}
+
+	if (!entry->config_state) {
+		/*
+		 * The handle is already disabled, that means no iota/irq freeing via
+		 * the firmware interfaces anymore. Need to free resources manually
+		 * (DMA memory, debug, sysfs)...
+		 */
+		zpci_stop_device(zdev);
+	}
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev)
+		return;
+
+	zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_add);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_rescan);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_NOWAIT);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_update);
+
 	clp_free_block(rrb);
 	return rc;
 }
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index a2343c1f6e04..7e5573acb063 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,6 +10,7 @@
 #include <linux/export.h>
 #include <linux/iommu-helper.h>
 #include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <asm/pci_dma.h>
 
@@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 		 */
 		goto no_refresh;
 
-	rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
-				   nr_pages * PAGE_SIZE);
+	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+				nr_pages * PAGE_SIZE);
 
 no_refresh:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 
 int zpci_dma_init_device(struct zpci_dev *zdev)
 {
-	unsigned int bitmap_order;
 	int rc;
 
 	spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 
 	zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
 	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
-	bitmap_order = get_order(zdev->iommu_pages / 8);
-	pr_info("iommu_size: 0x%lx  iommu_pages: 0x%lx  bitmap_order: %i\n",
-		 zdev->iommu_size, zdev->iommu_pages, bitmap_order);
-
-	zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-						       bitmap_order);
+	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
 	if (!zdev->iommu_bitmap) {
 		rc = -ENOMEM;
 		goto out_reg;
@@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
 {
 	zpci_unregister_ioat(zdev, 0);
 	dma_cleanup_tables(zdev);
-	free_pages((unsigned long) zdev->iommu_bitmap,
-		   get_order(zdev->iommu_pages / 8));
+	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
 	zdev->next_bit = 0;
 }
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ec62e3a0dc09..0aecaf954845 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
 		clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
 		break;
 	case 0x0306:
-		clp_find_pci_devices();
+		clp_rescan_pci_devices();
 		break;
 	default:
 		break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 22eeb9d7ffeb..85267c058af8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 	return cc;
 }
 
-int s390pci_mod_fc(u64 req, struct zpci_fib *fib)
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 {
 	u8 cc, status;
 
@@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
 	return cc;
 }
 
-int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 {
 	u8 cc, status;
 
@@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
 }
 
 /* Set Interruption Controls */
-void set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
 {
 	asm volatile (
 		"	.insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
@@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int s390pci_load(u64 *data, u64 req, u64 offset)
+int zpci_load(u64 *data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset)
 			    __func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 }
-EXPORT_SYMBOL_GPL(s390pci_load);
+EXPORT_SYMBOL_GPL(zpci_load);
 
 /* PCI Store */
 static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
@@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int s390pci_store(u64 data, u64 req, u64 offset)
+int zpci_store(u64 data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset)
 			__func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 }
-EXPORT_SYMBOL_GPL(s390pci_store);
+EXPORT_SYMBOL_GPL(zpci_store);
 
 /* PCI Store Block */
 static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
@@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int s390pci_store_block(const u64 *data, u64 req, u64 offset)
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset)
 			    __func__, cc, status, req, offset);
 	return (cc > 0) ? -EIO : cc;
 }
-EXPORT_SYMBOL_GPL(s390pci_store_block);
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
deleted file mode 100644
index b097aed05a9b..000000000000
--- a/arch/s390/pci/pci_msi.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- *   Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/rculist.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <asm/hw_irq.h>
-
-/* mapping of irq numbers to msi_desc */
-static struct hlist_head *msi_hash;
-static const unsigned int msi_hash_bits = 8;
-#define MSI_HASH_BUCKETS (1U << msi_hash_bits)
-#define msi_hashfn(nr)	hash_long(nr, msi_hash_bits)
-
-static DEFINE_SPINLOCK(msi_map_lock);
-
-struct msi_desc *__irq_get_msi_desc(unsigned int irq)
-{
-	struct msi_map *map;
-
-	hlist_for_each_entry_rcu(map,
-			&msi_hash[msi_hashfn(irq)], msi_chain)
-		if (map->irq == irq)
-			return map->msi;
-	return NULL;
-}
-
-int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
-	if (msi->msi_attrib.is_msix) {
-		int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL;
-		msi->masked = readl(msi->mask_base + offset);
-		writel(flag, msi->mask_base + offset);
-	} else {
-		if (msi->msi_attrib.maskbit) {
-			int pos;
-			u32 mask_bits;
-
-			pos = (long) msi->mask_base;
-			pci_read_config_dword(msi->dev, pos, &mask_bits);
-			mask_bits &= ~(mask);
-			mask_bits |= flag & mask;
-			pci_write_config_dword(msi->dev, pos, mask_bits);
-		} else {
-			return 0;
-		}
-	}
-
-	msi->msi_attrib.maskbit = !!flag;
-	return 1;
-}
-
-int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
-			unsigned int nr, int offset)
-{
-	struct msi_map *map;
-	struct msi_msg msg;
-	int rc;
-
-	map = kmalloc(sizeof(*map), GFP_KERNEL);
-	if (map == NULL)
-		return -ENOMEM;
-
-	map->irq = nr;
-	map->msi = msi;
-	zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
-	INIT_HLIST_NODE(&map->msi_chain);
-
-	pr_debug("%s hashing irq: %u  to bucket nr: %llu\n",
-		__func__, nr, msi_hashfn(nr));
-	hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
-
-	spin_lock(&msi_map_lock);
-	rc = irq_set_msi_desc(nr, msi);
-	if (rc) {
-		spin_unlock(&msi_map_lock);
-		hlist_del_rcu(&map->msi_chain);
-		kfree(map);
-		zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
-		return rc;
-	}
-	spin_unlock(&msi_map_lock);
-
-	msg.data = nr - offset;
-	msg.address_lo = zdev->msi_addr & 0xffffffff;
-	msg.address_hi = zdev->msi_addr >> 32;
-	write_msi_msg(nr, &msg);
-	return 0;
-}
-
-void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
-{
-	int irq = msi->irq & ZPCI_MSI_MASK;
-	struct msi_map *map;
-
-	msi->msg.address_lo = 0;
-	msi->msg.address_hi = 0;
-	msi->msg.data = 0;
-	msi->irq = 0;
-	zpci_msi_set_mask_bits(msi, 1, 1);
-
-	spin_lock(&msi_map_lock);
-	map = zdev->msi_map[irq];
-	hlist_del_rcu(&map->msi_chain);
-	kfree(map);
-	zdev->msi_map[irq] = NULL;
-	spin_unlock(&msi_map_lock);
-}
-
-/*
- * The msi hash table has 256 entries which is good for 4..20
- * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
- * the hash table size adjustable later.
- */
-int __init zpci_msihash_init(void)
-{
-	unsigned int i;
-
-	msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL);
-	if (!msi_hash)
-		return -ENOMEM;
-
-	for (i = 0; i < MSI_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&msi_hash[i]);
-	return 0;
-}
-
-void __init zpci_msihash_exit(void)
-{
-	kfree(msi_hash);
-}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index e99a2557f186..cf8a12ff733b 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
 
+static void recover_callback(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+	int ret;
+
+	pci_stop_and_remove_bus_device(pdev);
+	ret = zpci_disable_device(zdev);
+	if (ret)
+		return;
+
+	ret = zpci_enable_device(zdev);
+	if (ret)
+		return;
+
+	pci_rescan_bus(zdev->bus);
+}
+
+static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc = device_schedule_callback(dev, recover_callback);
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
+
 static struct device_attribute *zpci_dev_attrs[] = {
 	&dev_attr_function_id,
 	&dev_attr_function_handle,
 	&dev_attr_pchid,
 	&dev_attr_pfgid,
+	&dev_attr_recover,
 	NULL,
 };
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1020dd85431a..1018ed3a3ca5 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -643,9 +643,9 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
 
 config CRASH_DUMP
 	bool "kernel crash dumps (EXPERIMENTAL)"
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 102f5d58b037..60ed3e1c4b75 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -69,7 +69,6 @@ static void pcibios_scanbus(struct pci_channel *hose)
 
 		pci_bus_size_bridges(bus);
 		pci_bus_assign_resources(bus);
-		pci_enable_bridges(bus);
 	} else {
 		pci_free_resource_list(&resources);
 	}
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index d30622592116..e3abfd4277e2 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -91,13 +91,11 @@ static struct cpuidle_driver cpuidle_driver = {
 
 int __init sh_mobile_setup_cpuidle(void)
 {
-	int ret;
-
 	if (sh_mobile_sleep_supported & SUSP_SH_SF)
 		cpuidle_driver.states[1].disabled = false;
 
 	if (sh_mobile_sleep_supported & SUSP_SH_STANDBY)
 		cpuidle_driver.states[2].disabled = false;
 
-	return cpuidle_register(&cpuidle_driver);
+	return cpuidle_register(&cpuidle_driver, NULL);
 }
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index d5e86c9f74fd..d15c0d8d550f 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -89,9 +89,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
 #define topology_core_id(cpu)                   (cpu)
 #define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
 #define topology_thread_cpumask(cpu)            cpumask_of(cpu)
-
-/* indicates that pointers to the topology struct cpumask maps are valid */
-#define arch_provides_topology_pointers         yes
 #endif
 
 #endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 11425633b2d7..6640e7bbeaa2 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -508,13 +508,8 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
 						rc_dev_cap.word);
 
 	/* Configure PCI Express MPS setting. */
-	list_for_each_entry(child, &root_bus->children, node) {
-		struct pci_dev *self = child->self;
-		if (!self)
-			continue;
-
-		pcie_bus_configure_settings(child, self->pcie_mpss);
-	}
+	list_for_each_entry(child, &root_bus->children, node)
+		pcie_bus_configure_settings(child);
 
 	/*
 	 * Set the mac_config register in trio based on the MPS/MRS of the link.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0ce..5c0ed72c02a2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
 	def_bool y
 	depends on 64BIT
 	select X86_DEV_DMA_OPS
+	select ARCH_USE_CMPXCHG_LOCKREF
 
 ### Arch settings
 config X86
@@ -81,7 +82,6 @@ config X86
 	select HAVE_USER_RETURN_NOTIFIER
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_TEXT_POKE_SMP
 	select HAVE_GENERIC_HARDIRQS
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select SPARSE_IRQ
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG
 config PARAVIRT_SPINLOCKS
 	bool "Paravirtualization layer for spinlocks"
 	depends on PARAVIRT && SMP
+	select UNINLINE_SPIN_UNLOCK
 	---help---
 	  Paravirtualized spinlocks allow a pvops backend to replace the
 	  spinlock implementation with something virtualization-friendly
@@ -656,6 +657,15 @@ config KVM_GUEST
 	  underlying device model, the host provides the guest with
 	  timing infrastructure such as time of day, and system time
 
+config KVM_DEBUG_FS
+	bool "Enable debug information for KVM Guests in debugfs"
+	depends on KVM_GUEST && DEBUG_FS
+	default n
+	---help---
+	  This option enables collection of various statistics for KVM guest.
+	  Statistics are displayed in debugfs filesystem. Enabling this option
+	  may incur significant overhead.
+
 source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT_TIME_ACCOUNTING
@@ -1344,8 +1354,12 @@ config ARCH_SELECT_MEMORY_MODEL
 	depends on ARCH_SPARSEMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
-	def_bool y
+	bool "Enable sysfs memory/probe interface"
 	depends on X86_64 && MEMORY_HOTPLUG
+	help
+	  This option enables a sysfs memory/probe interface for testing.
+	  See Documentation/memory-hotplug.txt for more information.
+	  If you are unsure how to answer this question, answer N.
 
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
@@ -1627,9 +1641,9 @@ config KEXEC
 
 	  It is an ongoing process to be certain the hardware in a machine
 	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
 
 config CRASH_DUMP
 	bool "kernel crash dumps"
@@ -1716,9 +1730,10 @@ config X86_NEED_RELOCS
 	depends on X86_32 && RELOCATABLE
 
 config PHYSICAL_ALIGN
-	hex "Alignment value to which kernel should be aligned" if X86_32
+	hex "Alignment value to which kernel should be aligned"
 	default "0x1000000"
-	range 0x2000 0x1000000
+	range 0x2000 0x1000000 if X86_32
+	range 0x200000 0x1000000 if X86_64
 	---help---
 	  This value puts the alignment restrictions on physical address
 	  where kernel is loaded and run from. Kernel is compiled for an
@@ -1736,6 +1751,9 @@ config PHYSICAL_ALIGN
 	  end result is that kernel runs from a physical address meeting
 	  above alignment restrictions.
 
+	  On 32-bit this value must be a multiple of 0x2000. On 64-bit
+	  this value must be a multiple of 0x200000.
+
 	  Don't change this unless you know what you are doing.
 
 config HOTPLUG_CPU
@@ -2270,6 +2288,32 @@ config RAPIDIO
 
 source "drivers/rapidio/Kconfig"
 
+config X86_SYSFB
+	bool "Mark VGA/VBE/EFI FB as generic system framebuffer"
+	help
+	  Firmwares often provide initial graphics framebuffers so the BIOS,
+	  bootloader or kernel can show basic video-output during boot for
+	  user-guidance and debugging. Historically, x86 used the VESA BIOS
+	  Extensions and EFI-framebuffers for this, which are mostly limited
+	  to x86.
+	  This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
+	  framebuffers so the new generic system-framebuffer drivers can be
+	  used on x86. If the framebuffer is not compatible with the generic
+	  modes, it is adverticed as fallback platform framebuffer so legacy
+	  drivers like efifb, vesafb and uvesafb can pick it up.
+	  If this option is not selected, all system framebuffers are always
+	  marked as fallback platform framebuffers as usual.
+
+	  Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will
+	  not be able to pick up generic system framebuffers if this option
+	  is selected. You are highly encouraged to enable simplefb as
+	  replacement if you select this option. simplefb can correctly deal
+	  with generic system framebuffers. But you should still keep vesafb
+	  and others enabled as fallback if a system framebuffer is
+	  incompatible with simplefb.
+
+	  If unsure, say Y.
+
 endmenu
 
 
@@ -2332,10 +2376,6 @@ config HAVE_ATOMIC_IOMAP
 	def_bool y
 	depends on X86_32
 
-config HAVE_TEXT_POKE_SMP
-	bool
-	select STOP_MACHINE if SMP
-
 config X86_DEV_DMA_OPS
 	bool
 	depends on X86_64 || STA2X11
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 07639c656fcd..41250fb33985 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -16,6 +16,10 @@ endif
 # e.g.: obj-y += foo_$(BITS).o
 export BITS
 
+ifdef CONFIG_X86_NEED_RELOCS
+        LDFLAGS_vmlinux := --emit-relocs
+endif
+
 ifeq ($(CONFIG_X86_32),y)
         BITS := 32
         UTS_MACHINE := i386
@@ -25,10 +29,6 @@ ifeq ($(CONFIG_X86_32),y)
         KBUILD_AFLAGS += $(biarch)
         KBUILD_CFLAGS += $(biarch)
 
-        ifdef CONFIG_RELOCATABLE
-                LDFLAGS_vmlinux := --emit-relocs
-        endif
-
         KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
 
         # Never want PIC in a 32-bit kernel, prevent breakage with GCC built
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 5b7531966b84..ef72baeff484 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -355,6 +355,7 @@ int strncmp(const char *cs, const char *ct, size_t count);
 size_t strnlen(const char *s, size_t maxlen);
 unsigned int atou(const char *s);
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
+size_t strlen(const char *s);
 
 /* tty.c */
 void puts(const char *);
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1e3184f6072f..5d6f6891b188 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -181,8 +181,9 @@ relocated:
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	leal	z_extract_offset_negative(%ebx), %ebp
 				/* push arguments for decompress_kernel: */
+	pushl	$z_output_len	/* decompressed length */
+	leal	z_extract_offset_negative(%ebx), %ebp
 	pushl	%ebp		/* output address */
 	pushl	$z_input_len	/* input_len */
 	leal	input_data(%ebx), %eax
@@ -191,33 +192,7 @@ relocated:
 	pushl	%eax		/* heap area */
 	pushl	%esi		/* real mode pointer */
 	call	decompress_kernel
-	addl	$20, %esp
-
-#if CONFIG_RELOCATABLE
-/*
- * Find the address of the relocations.
- */
-	leal	z_output_len(%ebp), %edi
-
-/*
- * Calculate the delta between where vmlinux was compiled to run
- * and where it was actually loaded.
- */
-	movl	%ebp, %ebx
-	subl	$LOAD_PHYSICAL_ADDR, %ebx
-	jz	2f	/* Nothing to be done if loaded at compiled addr. */
-/*
- * Process relocations.
- */
-
-1:	subl	$4, %edi
-	movl	(%edi), %ecx
-	testl	%ecx, %ecx
-	jz	2f
-	addl	%ebx, -__PAGE_OFFSET(%ebx, %ecx)
-	jmp	1b
-2:
-#endif
+	addl	$24, %esp
 
 /*
  * Jump to the decompressed kernel.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 06e71c2c16bf..c337422b575d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -338,6 +338,7 @@ relocated:
 	leaq	input_data(%rip), %rdx  /* input_data */
 	movl	$z_input_len, %ecx	/* input_len */
 	movq	%rbp, %r8		/* output target address */
+	movq	$z_output_len, %r9	/* decompressed length */
 	call	decompress_kernel
 	popq	%rsi
 
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 0319c88290a5..434f077d2c4d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -271,6 +271,79 @@ static void error(char *x)
 		asm("hlt");
 }
 
+#if CONFIG_X86_NEED_RELOCS
+static void handle_relocations(void *output, unsigned long output_len)
+{
+	int *reloc;
+	unsigned long delta, map, ptr;
+	unsigned long min_addr = (unsigned long)output;
+	unsigned long max_addr = min_addr + output_len;
+
+	/*
+	 * Calculate the delta between where vmlinux was linked to load
+	 * and where it was actually loaded.
+	 */
+	delta = min_addr - LOAD_PHYSICAL_ADDR;
+	if (!delta) {
+		debug_putstr("No relocation needed... ");
+		return;
+	}
+	debug_putstr("Performing relocations... ");
+
+	/*
+	 * The kernel contains a table of relocation addresses. Those
+	 * addresses have the final load address of the kernel in virtual
+	 * memory. We are currently working in the self map. So we need to
+	 * create an adjustment for kernel memory addresses to the self map.
+	 * This will involve subtracting out the base address of the kernel.
+	 */
+	map = delta - __START_KERNEL_map;
+
+	/*
+	 * Process relocations: 32 bit relocations first then 64 bit after.
+	 * Two sets of binary relocations are added to the end of the kernel
+	 * before compression. Each relocation table entry is the kernel
+	 * address of the location which needs to be updated stored as a
+	 * 32-bit value which is sign extended to 64 bits.
+	 *
+	 * Format is:
+	 *
+	 * kernel bits...
+	 * 0 - zero terminator for 64 bit relocations
+	 * 64 bit relocation repeated
+	 * 0 - zero terminator for 32 bit relocations
+	 * 32 bit relocation repeated
+	 *
+	 * So we work backwards from the end of the decompressed image.
+	 */
+	for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
+		int extended = *reloc;
+		extended += map;
+
+		ptr = (unsigned long)extended;
+		if (ptr < min_addr || ptr > max_addr)
+			error("32-bit relocation outside of kernel!\n");
+
+		*(uint32_t *)ptr += delta;
+	}
+#ifdef CONFIG_X86_64
+	for (reloc--; *reloc; reloc--) {
+		long extended = *reloc;
+		extended += map;
+
+		ptr = (unsigned long)extended;
+		if (ptr < min_addr || ptr > max_addr)
+			error("64-bit relocation outside of kernel!\n");
+
+		*(uint64_t *)ptr += delta;
+	}
+#endif
+}
+#else
+static inline void handle_relocations(void *output, unsigned long output_len)
+{ }
+#endif
+
 static void parse_elf(void *output)
 {
 #ifdef CONFIG_X86_64
@@ -325,7 +398,8 @@ static void parse_elf(void *output)
 asmlinkage void decompress_kernel(void *rmode, memptr heap,
 				  unsigned char *input_data,
 				  unsigned long input_len,
-				  unsigned char *output)
+				  unsigned char *output,
+				  unsigned long output_len)
 {
 	real_mode = rmode;
 
@@ -365,6 +439,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	debug_putstr("\nDecompressing Linux... ");
 	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
 	parse_elf(output);
+	handle_relocations(output, output_len);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return;
 }
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index cdac91ca55d3..565083c16e5c 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -55,7 +55,7 @@ static char *number(char *str, long num, int base, int size, int precision,
 	locase = (type & SMALL);
 	if (type & LEFT)
 		type &= ~ZEROPAD;
-	if (base < 2 || base > 36)
+	if (base < 2 || base > 16)
 		return NULL;
 	c = (type & ZEROPAD) ? '0' : ' ';
 	sign = 0;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index bccfca68430e..665a730307f2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -457,7 +457,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
 		if (ksig->ka.sa.sa_flags & SA_RESTORER)
 			restorer = ksig->ka.sa.sa_restorer;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 474dc1b59f72..4299eb05023c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -452,7 +452,7 @@ ia32_badsys:
 
 	CFI_ENDPROC
 	
-	.macro PTREGSCALL label, func, arg
+	.macro PTREGSCALL label, func
 	ALIGN
 GLOBAL(\label)
 	leaq \func(%rip),%rax
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2dfac58f3b11..b1977bad5435 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -86,6 +86,7 @@ extern int acpi_pci_disabled;
 extern int acpi_skip_timer_override;
 extern int acpi_use_timer_override;
 extern int acpi_fix_pin2_polarity;
+extern int acpi_disable_cmcff;
 
 extern u8 acpi_sci_flags;
 extern int acpi_sci_override_gsi;
@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
 
 #define acpi_lapic 0
 #define acpi_ioapic 0
+#define acpi_disable_cmcff 0
 static inline void acpi_noirq_set(void) { }
 static inline void acpi_disable_pci(void) { }
 static inline void disable_acpi(void) { }
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 58ed6d96a6ac..0a3f9c9f98d5 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
+#include <asm/ptrace.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -220,20 +221,11 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * no thread can be preempted in the instructions being modified (no iret to an
  * invalid instruction possible) or if the instructions are changed from a
  * consistent state to another consistent state atomically.
- * More care must be taken when modifying code in the SMP case because of
- * Intel's errata. text_poke_smp() takes care that errata, but still
- * doesn't support NMI/MCE handler code modifying.
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
-struct text_poke_param {
-	void *addr;
-	const void *opcode;
-	size_t len;
-};
-
 extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
-extern void text_poke_smp_batch(struct text_poke_param *params, int n);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f8119b582c3c..1d2091a226bc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -715,4 +715,6 @@ static inline void exiting_ack_irq(void)
 	ack_APIC_irq();
 }
 
+extern void ioapic_zap_locks(void);
+
 #endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 1c2d247f65ce..4582e8e1cd1a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,21 +3,25 @@
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
+# define __ASM_FORM_RAW(x)     x
 # define __ASM_FORM_COMMA(x) x,
 #else
 # define __ASM_FORM(x)	" " #x " "
+# define __ASM_FORM_RAW(x)     #x
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 
 #ifdef CONFIG_X86_32
 # define __ASM_SEL(a,b) __ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
 # define __ASM_SEL(a,b) __ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
 
 #define __ASM_SIZE(inst, ...)	__ASM_SEL(inst##l##__VA_ARGS__, \
 					  inst##q##__VA_ARGS__)
-#define __ASM_REG(reg)		__ASM_SEL(e##reg, r##reg)
+#define __ASM_REG(reg)         __ASM_SEL_RAW(e##reg, r##reg)
 
 #define _ASM_PTR	__ASM_SEL(.long, .quad)
 #define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 6dfd0195bb55..41639ce8fd63 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -15,6 +15,14 @@
 #include <linux/compiler.h>
 #include <asm/alternative.h>
 
+#if BITS_PER_LONG == 32
+# define _BITOPS_LONG_SHIFT 5
+#elif BITS_PER_LONG == 64
+# define _BITOPS_LONG_SHIFT 6
+#else
+# error "Unexpected BITS_PER_LONG"
+#endif
+
 #define BIT_64(n)			(U64_C(1) << (n))
 
 /*
@@ -59,7 +67,7 @@
  * restricted to acting on a single-word quantity.
  */
 static __always_inline void
-set_bit(unsigned int nr, volatile unsigned long *addr)
+set_bit(long nr, volatile unsigned long *addr)
 {
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
+static inline void __set_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
@@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
  * in order to ensure changes are visible on other processors.
  */
 static __always_inline void
-clear_bit(int nr, volatile unsigned long *addr)
+clear_bit(long nr, volatile unsigned long *addr)
 {
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr)
  * clear_bit() is atomic and implies release semantics before the memory
  * operation. It can be used for an unlock.
  */
-static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
+static inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
  * No memory barrier is required here, because x86 cannot reorder stores past
  * older loads. Same principle as spin_unlock.
  */
-static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	__clear_bit(nr, addr);
@@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
+static inline void __change_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(int nr, volatile unsigned long *addr)
+static inline void change_bit(long nr, volatile unsigned long *addr)
 {
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
  * This is the same as test_and_set_bit on x86.
  */
 static __always_inline int
-test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
 	return test_and_set_bit(nr, addr);
 }
@@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 	return oldbit;
 }
 
-static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
 {
-	return ((1UL << (nr % BITS_PER_LONG)) &
-		(addr[nr / BITS_PER_LONG])) != 0;
+	return ((1UL << (nr & (BITS_PER_LONG-1))) &
+		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
 {
 	int oldbit;
 
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 46fc474fd819..f50de6951738 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -49,9 +49,15 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src,
 						 int len, __wsum sum,
 						 int *err_ptr)
 {
+	__wsum ret;
+
 	might_sleep();
-	return csum_partial_copy_generic((__force void *)src, dst,
-					 len, sum, err_ptr, NULL);
+	stac();
+	ret = csum_partial_copy_generic((__force void *)src, dst,
+					len, sum, err_ptr, NULL);
+	clac();
+
+	return ret;
 }
 
 /*
@@ -176,10 +182,16 @@ static inline __wsum csum_and_copy_to_user(const void *src,
 					   int len, __wsum sum,
 					   int *err_ptr)
 {
+	__wsum ret;
+
 	might_sleep();
-	if (access_ok(VERIFY_WRITE, dst, len))
-		return csum_partial_copy_generic(src, (__force void *)dst,
-						 len, sum, NULL, err_ptr);
+	if (access_ok(VERIFY_WRITE, dst, len)) {
+		stac();
+		ret = csum_partial_copy_generic(src, (__force void *)dst,
+						len, sum, NULL, err_ptr);
+		clac();
+		return ret;
+	}
 
 	if (len)
 		*err_ptr = -EFAULT;
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 9bfdc41629ec..e6fd8a026c7b 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -133,7 +133,7 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
 
 /* Do not call this directly. Use the wrappers below */
-extern __wsum csum_partial_copy_generic(const void *src, const void *dst,
+extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
 					int len, __wsum sum,
 					int *src_err_ptr, int *dst_err_ptr);
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 47538a61c91b..d3f5c63078d8 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -366,9 +366,10 @@ extern bool __static_cpu_has_safe(u16 bit);
  */
 static __always_inline __pure bool __static_cpu_has(u16 bit)
 {
-#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+#ifdef CC_HAVE_ASM_GOTO
 
 #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+
 		/*
 		 * Catch too early usage of this before alternatives
 		 * have run.
@@ -384,6 +385,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 			 ".previous\n"
 			 /* skipping size check since replacement size = 0 */
 			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
+
 #endif
 
 		asm goto("1: jmp %l[t_no]\n"
@@ -406,7 +408,9 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		warn_pre_alternatives();
 		return false;
 #endif
-#else /* GCC_VERSION >= 40500 */
+
+#else /* CC_HAVE_ASM_GOTO */
+
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
 		asm volatile("1: movb $0,%0\n"
@@ -427,7 +431,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 			     ".previous\n"
 			     : "=qm" (flag) : "i" (bit));
 		return flag;
-#endif
+
+#endif /* CC_HAVE_ASM_GOTO */
 }
 
 #define static_cpu_has(bit)					\
@@ -441,7 +446,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
-#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+#ifdef CC_HAVE_ASM_GOTO
 /*
  * We need to spell the jumps to the compiler because, depending on the offset,
  * the replacement jump can be bigger than the original jump, and this we cannot
@@ -475,7 +480,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 		return false;
 	t_dynamic:
 		return __static_cpu_has_safe(bit);
-#else /* GCC_VERSION >= 40500 */
+#else
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
 		asm volatile("1: movb $2,%0\n"
@@ -511,7 +516,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     : "=qm" (flag)
 			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
 		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif
+#endif /* CC_HAVE_ASM_GOTO */
 }
 
 #define static_cpu_has_safe(bit)				\
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index cccd07fa5e3a..779c2efe2e97 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -29,7 +29,7 @@ extern void e820_setup_gap(void);
 extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
 			unsigned long start_addr, unsigned long long end_addr);
 struct setup_data;
-extern void parse_e820_ext(struct setup_data *data);
+extern void parse_e820_ext(u64 phys_addr, u32 data_len);
 
 #if defined(CONFIG_X86_64) || \
 	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e4ac559c4a24..92b3bae08b74 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -26,56 +26,56 @@
 #include <asm/sections.h>
 
 /* Interrupt handlers registered during init_IRQ */
-extern void apic_timer_interrupt(void);
-extern void x86_platform_ipi(void);
-extern void kvm_posted_intr_ipi(void);
-extern void error_interrupt(void);
-extern void irq_work_interrupt(void);
-
-extern void spurious_interrupt(void);
-extern void thermal_interrupt(void);
-extern void reschedule_interrupt(void);
-
-extern void invalidate_interrupt(void);
-extern void invalidate_interrupt0(void);
-extern void invalidate_interrupt1(void);
-extern void invalidate_interrupt2(void);
-extern void invalidate_interrupt3(void);
-extern void invalidate_interrupt4(void);
-extern void invalidate_interrupt5(void);
-extern void invalidate_interrupt6(void);
-extern void invalidate_interrupt7(void);
-extern void invalidate_interrupt8(void);
-extern void invalidate_interrupt9(void);
-extern void invalidate_interrupt10(void);
-extern void invalidate_interrupt11(void);
-extern void invalidate_interrupt12(void);
-extern void invalidate_interrupt13(void);
-extern void invalidate_interrupt14(void);
-extern void invalidate_interrupt15(void);
-extern void invalidate_interrupt16(void);
-extern void invalidate_interrupt17(void);
-extern void invalidate_interrupt18(void);
-extern void invalidate_interrupt19(void);
-extern void invalidate_interrupt20(void);
-extern void invalidate_interrupt21(void);
-extern void invalidate_interrupt22(void);
-extern void invalidate_interrupt23(void);
-extern void invalidate_interrupt24(void);
-extern void invalidate_interrupt25(void);
-extern void invalidate_interrupt26(void);
-extern void invalidate_interrupt27(void);
-extern void invalidate_interrupt28(void);
-extern void invalidate_interrupt29(void);
-extern void invalidate_interrupt30(void);
-extern void invalidate_interrupt31(void);
-
-extern void irq_move_cleanup_interrupt(void);
-extern void reboot_interrupt(void);
-extern void threshold_interrupt(void);
-
-extern void call_function_interrupt(void);
-extern void call_function_single_interrupt(void);
+extern asmlinkage void apic_timer_interrupt(void);
+extern asmlinkage void x86_platform_ipi(void);
+extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void error_interrupt(void);
+extern asmlinkage void irq_work_interrupt(void);
+
+extern asmlinkage void spurious_interrupt(void);
+extern asmlinkage void thermal_interrupt(void);
+extern asmlinkage void reschedule_interrupt(void);
+
+extern asmlinkage void invalidate_interrupt(void);
+extern asmlinkage void invalidate_interrupt0(void);
+extern asmlinkage void invalidate_interrupt1(void);
+extern asmlinkage void invalidate_interrupt2(void);
+extern asmlinkage void invalidate_interrupt3(void);
+extern asmlinkage void invalidate_interrupt4(void);
+extern asmlinkage void invalidate_interrupt5(void);
+extern asmlinkage void invalidate_interrupt6(void);
+extern asmlinkage void invalidate_interrupt7(void);
+extern asmlinkage void invalidate_interrupt8(void);
+extern asmlinkage void invalidate_interrupt9(void);
+extern asmlinkage void invalidate_interrupt10(void);
+extern asmlinkage void invalidate_interrupt11(void);
+extern asmlinkage void invalidate_interrupt12(void);
+extern asmlinkage void invalidate_interrupt13(void);
+extern asmlinkage void invalidate_interrupt14(void);
+extern asmlinkage void invalidate_interrupt15(void);
+extern asmlinkage void invalidate_interrupt16(void);
+extern asmlinkage void invalidate_interrupt17(void);
+extern asmlinkage void invalidate_interrupt18(void);
+extern asmlinkage void invalidate_interrupt19(void);
+extern asmlinkage void invalidate_interrupt20(void);
+extern asmlinkage void invalidate_interrupt21(void);
+extern asmlinkage void invalidate_interrupt22(void);
+extern asmlinkage void invalidate_interrupt23(void);
+extern asmlinkage void invalidate_interrupt24(void);
+extern asmlinkage void invalidate_interrupt25(void);
+extern asmlinkage void invalidate_interrupt26(void);
+extern asmlinkage void invalidate_interrupt27(void);
+extern asmlinkage void invalidate_interrupt28(void);
+extern asmlinkage void invalidate_interrupt29(void);
+extern asmlinkage void invalidate_interrupt30(void);
+extern asmlinkage void invalidate_interrupt31(void);
+
+extern asmlinkage void irq_move_cleanup_interrupt(void);
+extern asmlinkage void reboot_interrupt(void);
+extern asmlinkage void threshold_interrupt(void);
+
+extern asmlinkage void call_function_interrupt(void);
+extern asmlinkage void call_function_single_interrupt(void);
 
 #ifdef CONFIG_TRACING
 /* Interrupt handlers registered during init_IRQ */
@@ -172,22 +172,18 @@ extern atomic_t irq_mis_count;
 extern void eisa_set_level_irq(unsigned int irq);
 
 /* SMP */
-extern void smp_apic_timer_interrupt(struct pt_regs *);
-extern void smp_spurious_interrupt(struct pt_regs *);
-extern void smp_x86_platform_ipi(struct pt_regs *);
-extern void smp_error_interrupt(struct pt_regs *);
+extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
+extern __visible void smp_spurious_interrupt(struct pt_regs *);
+extern __visible void smp_x86_platform_ipi(struct pt_regs *);
+extern __visible void smp_error_interrupt(struct pt_regs *);
 #ifdef CONFIG_X86_IO_APIC
 extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
 #endif
 #ifdef CONFIG_SMP
-extern void smp_reschedule_interrupt(struct pt_regs *);
-extern void smp_call_function_interrupt(struct pt_regs *);
-extern void smp_call_function_single_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_32
-extern void smp_invalidate_interrupt(struct pt_regs *);
-#else
-extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
-#endif
+extern __visible void smp_reschedule_interrupt(struct pt_regs *);
+extern __visible void smp_call_function_interrupt(struct pt_regs *);
+extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
+extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 2d4b5e6107cd..e42f758a0fbd 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -33,7 +33,7 @@ struct hypervisor_x86 {
 	const char	*name;
 
 	/* Detection routine */
-	bool		(*detect)(void);
+	uint32_t	(*detect)(void);
 
 	/* Adjust CPU feature bits (run once per CPU) */
 	void		(*set_cpu_features)(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 57873beb3292..0ea10f27d613 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@ extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 extern bool handle_irq(unsigned irq, struct pt_regs *regs);
 
-extern unsigned int do_IRQ(struct pt_regs *regs);
+extern __visible unsigned int do_IRQ(struct pt_regs *regs);
 
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5a6d2873f80e..9454c167629f 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -49,10 +49,10 @@ typedef u8 kprobe_opcode_t;
 #define flush_insn_slot(p)	do { } while (0)
 
 /* optinsn template addresses */
-extern kprobe_opcode_t optprobe_template_entry;
-extern kprobe_opcode_t optprobe_template_val;
-extern kprobe_opcode_t optprobe_template_call;
-extern kprobe_opcode_t optprobe_template_end;
+extern __visible kprobe_opcode_t optprobe_template_entry;
+extern __visible kprobe_opcode_t optprobe_template_val;
+extern __visible kprobe_opcode_t optprobe_template_call;
+extern __visible kprobe_opcode_t optprobe_template_end;
 #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
 #define MAX_OPTINSN_SIZE 				\
 	(((unsigned long)&optprobe_template_end -	\
@@ -62,7 +62,7 @@ extern kprobe_opcode_t optprobe_template_end;
 extern const int kretprobe_blacklist_size;
 
 void arch_remove_kprobe(struct kprobe *p);
-void kretprobe_trampoline(void);
+asmlinkage void kretprobe_trampoline(void);
 
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f87f7fcefa0a..c76ff74a98f2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,6 +286,7 @@ struct kvm_mmu {
 	u64 *pae_root;
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -323,6 +324,7 @@ struct kvm_pmu {
 	u64 global_ovf_ctrl;
 	u64 counter_bitmask[2];
 	u64 global_ctrl_mask;
+	u64 reserved_bits;
 	u8 version;
 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
 	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
@@ -511,6 +513,14 @@ struct kvm_vcpu_arch {
 	 * instruction.
 	 */
 	bool write_fault_to_shadow_pgtable;
+
+	/* set at EPT violation at this point */
+	unsigned long exit_qualification;
+
+	/* pv related host specific info */
+	struct {
+		bool pv_unhalted;
+	} pv;
 };
 
 struct kvm_lpage_info {
@@ -802,8 +812,8 @@ extern u32  kvm_min_guest_tsc_khz;
 extern u32  kvm_max_guest_tsc_khz;
 
 enum emulation_result {
-	EMULATE_DONE,       /* no further processing */
-	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
+	EMULATE_DONE,         /* no further processing */
+	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };
 
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f2d5eb..1df115909758 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -85,26 +85,20 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
-static inline bool kvm_para_available(void)
+static inline uint32_t kvm_cpuid_base(void)
 {
-	unsigned int eax, ebx, ecx, edx;
-	char signature[13];
-
 	if (boot_cpu_data.cpuid_level < 0)
-		return false;	/* So we don't blow up on old processors */
+		return 0;	/* So we don't blow up on old processors */
 
-	if (cpu_has_hypervisor) {
-		cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
-		memcpy(signature + 0, &ebx, 4);
-		memcpy(signature + 4, &ecx, 4);
-		memcpy(signature + 8, &edx, 4);
-		signature[12] = 0;
+	if (cpu_has_hypervisor)
+		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
 
-		if (strcmp(signature, "KVMKVMKVM") == 0)
-			return true;
-	}
+	return 0;
+}
 
-	return false;
+static inline bool kvm_para_available(void)
+{
+	return kvm_cpuid_base() != 0;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
@@ -118,10 +112,20 @@ void kvm_async_pf_task_wait(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-#else
-#define kvm_guest_init() do { } while (0)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init kvm_spinlock_init(void);
+#else /* !CONFIG_PARAVIRT_SPINLOCKS */
+static inline void kvm_spinlock_init(void)
+{
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_KVM_GUEST */
+#define kvm_guest_init() do {} while (0)
 #define kvm_async_pf_task_wait(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
+
 static inline u32 kvm_read_and_reset_pf_reason(void)
 {
 	return 0;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 29e3093bbd21..cbe6b9e404ce 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -32,11 +32,20 @@
 #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
 #define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
-#define MCACOD		  0xffff     /* MCA Error Code */
+
+/*
+ * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
+ * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
+ * errors to indicate that errors are being filtered by hardware.
+ * We should mask out bit 12 when looking for specific signatures
+ * of uncorrected errors - so the F bit is deliberately skipped
+ * in this #define.
+ */
+#define MCACOD		  0xefff     /* MCA Error Code */
 
 /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
 #define MCACOD_SCRUB	0x00C0	/* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK	0xfff0
+#define MCACOD_SCRUBMSK	0xeff0	/* Skip bit 12 ('F' bit) */
 #define MCACOD_L3WB	0x017A	/* L3 Explicit Writeback */
 #define MCACOD_DATA	0x0134	/* Data Load */
 #define MCACOD_INSTR	0x0150	/* Instruction Fetch */
@@ -188,6 +197,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
 				    const char __user *ubuf,
 				    size_t usize, loff_t *off));
 
+/* Disable CMCI/polling for MCA bank claimed by firmware */
+extern void mce_disable_bank(int bank);
+
 /*
  * Exception handler
  */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index cdbf36776106..be12c534fd59 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -45,22 +45,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		/* Re-load page tables */
 		load_cr3(next->pgd);
 
-		/* stop flush ipis for the previous mm */
+		/* Stop flush ipis for the previous mm */
 		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 
-		/*
-		 * load the LDT, if the LDT is different:
-		 */
+		/* Load the LDT, if the LDT is different: */
 		if (unlikely(prev->context.ldt != next->context.ldt))
 			load_LDT_nolock(&next->context);
 	}
 #ifdef CONFIG_SMP
-	else {
+	  else {
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
 
-		if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
-			/* We were in lazy tlb mode and leave_mm disabled
+		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+			/*
+			 * On established mms, the mm_cpumask is only changed
+			 * from irq context, from ptep_clear_flush() while in
+			 * lazy tlb mode, and here. Irqs are blocked during
+			 * schedule, protecting us from simultaneous changes.
+			 */
+			cpumask_set_cpu(cpu, mm_cpumask(next));
+			/*
+			 * We were in lazy tlb mode and leave_mm disabled
 			 * tlb flush IPI delivery. We must reload CR3
 			 * to make sure to use no freed page tables.
 			 */
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 2c543fff241b..e7e6751648ed 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -16,6 +16,20 @@
  *
  * Atomically decrements @v and calls <fail_fn> if the result is negative.
  */
+#ifdef CC_HAVE_ASM_GOTO
+static inline void __mutex_fastpath_lock(atomic_t *v,
+					 void (*fail_fn)(atomic_t *))
+{
+	asm volatile goto(LOCK_PREFIX "   decl %0\n"
+			  "   jns %l[exit]\n"
+			  : : "m" (v->counter)
+			  : "memory", "cc"
+			  : exit);
+	fail_fn(v);
+exit:
+	return;
+}
+#else
 #define __mutex_fastpath_lock(v, fail_fn)			\
 do {								\
 	unsigned long dummy;					\
@@ -32,6 +46,7 @@ do {								\
 		     : "rax", "rsi", "rdx", "rcx",		\
 		       "r8", "r9", "r10", "r11", "memory");	\
 } while (0)
+#endif
 
 /**
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
@@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
  *
  * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
  */
+#ifdef CC_HAVE_ASM_GOTO
+static inline void __mutex_fastpath_unlock(atomic_t *v,
+					   void (*fail_fn)(atomic_t *))
+{
+	asm volatile goto(LOCK_PREFIX "   incl %0\n"
+			  "   jg %l[exit]\n"
+			  : : "m" (v->counter)
+			  : "memory", "cc"
+			  : exit);
+	fail_fn(v);
+exit:
+	return;
+}
+#else
 #define __mutex_fastpath_unlock(v, fail_fn)			\
 do {								\
 	unsigned long dummy;					\
@@ -72,6 +101,7 @@ do {								\
 		     : "rax", "rsi", "rdx", "rcx",		\
 		       "r8", "r9", "r10", "r11", "memory");	\
 } while (0)
+#endif
 
 #define __mutex_slowpath_needs_to_unlock()	1
 
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ef17af013475..f48b17df4224 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,6 +15,8 @@
  */
 #define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 
+#define __START_KERNEL_map	__PAGE_OFFSET
+
 #define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 6c896fbe21db..43dcd804ebd5 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,11 +32,6 @@
  */
 #define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
 
-#define __PHYSICAL_START	((CONFIG_PHYSICAL_START +	 	\
-				  (CONFIG_PHYSICAL_ALIGN - 1)) &	\
-				 ~(CONFIG_PHYSICAL_ALIGN - 1))
-
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 54c97879195e..f97fbe3abb67 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -33,6 +33,11 @@
 	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
+#define __PHYSICAL_START	ALIGN(CONFIG_PHYSICAL_START, \
+				      CONFIG_PHYSICAL_ALIGN)
+
+#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
+
 #ifdef CONFIG_X86_64
 #include <asm/page_64_types.h>
 #else
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee4c900..401f350ef71b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-static inline int arch_spin_is_locked(struct arch_spinlock *lock)
+static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+	PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
 }
 
-static inline int arch_spin_is_contended(struct arch_spinlock *lock)
+static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
-static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
-}
-
-static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
-						  unsigned long flags)
-{
-	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
-}
-
-static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
-{
-	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
-}
-
-static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
 }
 
 #endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0db1fcac668c..aab8f671b523 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,15 @@ struct pv_mmu_ops {
 };
 
 struct arch_spinlock;
+#ifdef CONFIG_SMP
+#include <asm/spinlock_types.h>
+#else
+typedef u16 __ticket_t;
+#endif
+
 struct pv_lock_ops {
-	int (*spin_is_locked)(struct arch_spinlock *lock);
-	int (*spin_is_contended)(struct arch_spinlock *lock);
-	void (*spin_lock)(struct arch_spinlock *lock);
-	void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct arch_spinlock *lock);
-	void (*spin_unlock)(struct arch_spinlock *lock);
+	struct paravirt_callee_save lock_spinning;
+	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
 };
 
 /* This contains all the paravirt structures: we get a convenient
@@ -387,7 +389,8 @@ extern struct pv_lock_ops pv_lock_ops;
 
 /* Simple instruction patching code. */
 #define DEF_NATIVE(ops, name, code) 					\
-	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
+	extern const char start_##ops##_##name[] __visible,		\
+			  end_##ops##_##name[] __visible;		\
 	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
 
 unsigned paravirt_patch_nop(void);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1c00631164c2..8d16befdec88 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -22,7 +22,8 @@
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+	__visible;
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
 extern spinlock_t pgd_lock;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 24cf5aefb704..987c75ecc334 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -412,7 +412,7 @@ union irq_stack_union {
 	};
 };
 
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union);
+DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
 DECLARE_INIT_PER_CPU(irq_stack_union);
 
 DECLARE_PER_CPU(char *, irq_stack_ptr);
@@ -942,33 +942,19 @@ extern int set_tsc_mode(unsigned int val);
 
 extern u16 amd_get_nb_id(int cpu);
 
-struct aperfmperf {
-	u64 aperf, mperf;
-};
-
-static inline void get_aperfmperf(struct aperfmperf *am)
+static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 {
-	WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
-
-	rdmsrl(MSR_IA32_APERF, am->aperf);
-	rdmsrl(MSR_IA32_MPERF, am->mperf);
-}
+	uint32_t base, eax, signature[3];
 
-#define APERFMPERF_SHIFT 10
+	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+		cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 
-static inline
-unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
-				    struct aperfmperf *new)
-{
-	u64 aperf = new->aperf - old->aperf;
-	u64 mperf = new->mperf - old->mperf;
-	unsigned long ratio = aperf;
-
-	mperf >>= APERFMPERF_SHIFT;
-	if (mperf)
-		ratio = div64_u64(aperf, mperf);
+		if (!memcmp(sig, signature, 12) &&
+		    (leaves == 0 || ((eax - base) >= leaves)))
+			return base;
+	}
 
-	return ratio;
+	return 0;
 }
 
 extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 109a9dd5d454..be8269b00e2a 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
 	struct pvclock_vcpu_time_info pvti;
-	u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index b7bf3505e1ec..347555492dad 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -6,6 +6,8 @@
 
 #define COMMAND_LINE_SIZE 2048
 
+#include <linux/linkage.h>
+
 #ifdef __i386__
 
 #include <linux/pfn.h>
@@ -108,11 +110,11 @@ void *extend_brk(size_t size, size_t align);
 extern void probe_roms(void);
 #ifdef __i386__
 
-void __init i386_start_kernel(void);
+asmlinkage void __init i386_start_kernel(void);
 
 #else
-void __init x86_64_start_kernel(char *real_mode);
-void __init x86_64_start_reservations(char *real_mode_data);
+asmlinkage void __init x86_64_start_kernel(char *real_mode);
+asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
 
 #endif /* __i386__ */
 #endif /* _SETUP */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 2f4d924fe6c9..645cad2c95ff 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -101,7 +101,7 @@ static inline void native_wbinvd(void)
 	asm volatile("wbinvd": : :"memory");
 }
 
-extern void native_load_gs_index(unsigned);
+extern asmlinkage void native_load_gs_index(unsigned);
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7db723f..bf156ded74b5 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -1,11 +1,14 @@
 #ifndef _ASM_X86_SPINLOCK_H
 #define _ASM_X86_SPINLOCK_H
 
+#include <linux/jump_label.h>
 #include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <linux/compiler.h>
 #include <asm/paravirt.h>
+#include <asm/bitops.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  *
@@ -34,6 +37,36 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+/* How long a lock should spin before we consider blocking */
+#define SPIN_THRESHOLD	(1 << 15)
+
+extern struct static_key paravirt_ticketlocks_enabled;
+static __always_inline bool static_key_false(struct static_key *key);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
+{
+	set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+}
+
+#else  /* !CONFIG_PARAVIRT_SPINLOCKS */
+static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
+							__ticket_t ticket)
+{
+}
+static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
+							__ticket_t ticket)
+{
+}
+
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.tickets.head == lock.tickets.tail;
+}
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
@@ -47,81 +80,101 @@
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
  */
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	register struct __raw_tickets inc = { .tail = 1 };
+	register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
 
 	inc = xadd(&lock->tickets, inc);
+	if (likely(inc.head == inc.tail))
+		goto out;
 
+	inc.tail &= ~TICKET_SLOWPATH_FLAG;
 	for (;;) {
-		if (inc.head == inc.tail)
-			break;
-		cpu_relax();
-		inc.head = ACCESS_ONCE(lock->tickets.head);
+		unsigned count = SPIN_THRESHOLD;
+
+		do {
+			if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
+				goto out;
+			cpu_relax();
+		} while (--count);
+		__ticket_lock_spinning(lock, inc.tail);
 	}
-	barrier();		/* make sure nothing creeps before the lock is taken */
+out:	barrier();	/* make sure nothing creeps before the lock is taken */
 }
 
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	arch_spinlock_t old, new;
 
 	old.tickets = ACCESS_ONCE(lock->tickets);
-	if (old.tickets.head != old.tickets.tail)
+	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
 		return 0;
 
-	new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
 
 	/* cmpxchg is a full barrier, so nothing can move before it */
 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
+					    arch_spinlock_t old)
 {
-	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
+	arch_spinlock_t new;
+
+	BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
+
+	/* Perform the unlock on the "before" copy */
+	old.tickets.head += TICKET_LOCK_INC;
+
+	/* Clear the slowpath flag */
+	new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
+
+	/*
+	 * If the lock is uncontended, clear the flag - use cmpxchg in
+	 * case it changes behind our back though.
+	 */
+	if (new.tickets.head != new.tickets.tail ||
+	    cmpxchg(&lock->head_tail, old.head_tail,
+					new.head_tail) != old.head_tail) {
+		/*
+		 * Lock still has someone queued for it, so wake up an
+		 * appropriate waiter.
+		 */
+		__ticket_unlock_kick(lock, old.tickets.head);
+	}
 }
 
-static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+	if (TICKET_SLOWPATH_FLAG &&
+	    static_key_false(&paravirt_ticketlocks_enabled)) {
+		arch_spinlock_t prev;
 
-	return tmp.tail != tmp.head;
-}
+		prev = *lock;
+		add_smp(&lock->tickets.head, TICKET_LOCK_INC);
 
-static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
-{
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+		/* add_smp() is a full mb() */
 
-	return (__ticket_t)(tmp.tail - tmp.head) > 1;
+		if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
+			__ticket_unlock_slowpath(lock, prev);
+	} else
+		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
 }
 
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
-
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return __ticket_spin_is_locked(lock);
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	return __ticket_spin_is_contended(lock);
-}
-#define arch_spin_is_contended	arch_spin_is_contended
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	__ticket_spin_lock(lock);
+	return tmp.tail != tmp.head;
 }
 
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	return __ticket_spin_trylock(lock);
-}
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	__ticket_spin_unlock(lock);
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
+#define arch_spin_is_contended	arch_spin_is_contended
 
 static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
@@ -129,8 +182,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 	arch_spin_lock(lock);
 }
 
-#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
-
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07fc006..4f1bea19945b 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,13 +1,17 @@
 #ifndef _ASM_X86_SPINLOCK_TYPES_H
 #define _ASM_X86_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
-#if (CONFIG_NR_CPUS < 256)
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define __TICKET_LOCK_INC	2
+#define TICKET_SLOWPATH_FLAG	((__ticket_t)1)
+#else
+#define __TICKET_LOCK_INC	1
+#define TICKET_SLOWPATH_FLAG	((__ticket_t)0)
+#endif
+
+#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
 typedef u8  __ticket_t;
 typedef u16 __ticketpair_t;
 #else
@@ -15,6 +19,8 @@ typedef u16 __ticket_t;
 typedef u32 __ticketpair_t;
 #endif
 
+#define TICKET_LOCK_INC	((__ticket_t)__TICKET_LOCK_INC)
+
 #define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
 
 typedef struct arch_spinlock {
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 4ec45b3abba1..d7f3b3b78ac3 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,8 +2,8 @@
 #define _ASM_X86_SWITCH_TO_H
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
-struct task_struct *__switch_to(struct task_struct *prev,
-				struct task_struct *next);
+__visible struct task_struct *__switch_to(struct task_struct *prev,
+					   struct task_struct *next);
 struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss);
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 9d09b4073b60..05af3b31d522 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -26,9 +26,9 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void sync_set_bit(int nr, volatile unsigned long *addr)
+static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btsl %1,%0"
+	asm volatile("lock; bts %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr)
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
-static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
+static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btrl %1,%0"
+	asm volatile("lock; btr %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void sync_change_bit(int nr, volatile unsigned long *addr)
+static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btcl %1,%0"
+	asm volatile("lock; btc %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
-	asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0"
+	asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
@@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
-	asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0"
+	asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
@@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
-	asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0"
+	asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2e188d68397c..aea284b41312 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,7 +20,8 @@
 #include <asm/thread_info.h>	/* for TS_COMPAT */
 #include <asm/unistd.h>
 
-extern const unsigned long sys_call_table[];
+typedef void (*sys_call_ptr_t)(void);
+extern const sys_call_ptr_t sys_call_table[];
 
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2917a6452c49..592a6a672e07 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -24,7 +24,7 @@ asmlinkage long sys_iopl(unsigned int);
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
 /* kernel/signal.c */
-long sys_rt_sigreturn(void);
+asmlinkage long sys_rt_sigreturn(void);
 
 /* kernel/tls.c */
 asmlinkage long sys_set_thread_area(struct user_desc __user *);
@@ -34,7 +34,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *);
 #ifdef CONFIG_X86_32
 
 /* kernel/signal.c */
-unsigned long sys_sigreturn(void);
+asmlinkage unsigned long sys_sigreturn(void);
 
 /* kernel/vm86_32.c */
 asmlinkage long sys_vm86old(struct vm86_struct __user *);
@@ -44,7 +44,7 @@ asmlinkage long sys_vm86(unsigned long, unsigned long);
 
 /* X86_64 only */
 /* kernel/process_64.c */
-long sys_arch_prctl(int, unsigned long);
+asmlinkage long sys_arch_prctl(int, unsigned long);
 
 /* kernel/sys_x86_64.c */
 asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h
new file mode 100644
index 000000000000..2aeb3e25579c
--- /dev/null
+++ b/arch/x86/include/asm/sysfb.h
@@ -0,0 +1,98 @@
+#ifndef _ARCH_X86_KERNEL_SYSFB_H
+#define _ARCH_X86_KERNEL_SYSFB_H
+
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/screen_info.h>
+
+enum {
+	M_I17,		/* 17-Inch iMac */
+	M_I20,		/* 20-Inch iMac */
+	M_I20_SR,	/* 20-Inch iMac (Santa Rosa) */
+	M_I24,		/* 24-Inch iMac */
+	M_I24_8_1,	/* 24-Inch iMac, 8,1th gen */
+	M_I24_10_1,	/* 24-Inch iMac, 10,1th gen */
+	M_I27_11_1,	/* 27-Inch iMac, 11,1th gen */
+	M_MINI,		/* Mac Mini */
+	M_MINI_3_1,	/* Mac Mini, 3,1th gen */
+	M_MINI_4_1,	/* Mac Mini, 4,1th gen */
+	M_MB,		/* MacBook */
+	M_MB_2,		/* MacBook, 2nd rev. */
+	M_MB_3,		/* MacBook, 3rd rev. */
+	M_MB_5_1,	/* MacBook, 5th rev. */
+	M_MB_6_1,	/* MacBook, 6th rev. */
+	M_MB_7_1,	/* MacBook, 7th rev. */
+	M_MB_SR,	/* MacBook, 2nd gen, (Santa Rosa) */
+	M_MBA,		/* MacBook Air */
+	M_MBA_3,	/* Macbook Air, 3rd rev */
+	M_MBP,		/* MacBook Pro */
+	M_MBP_2,	/* MacBook Pro 2nd gen */
+	M_MBP_2_2,	/* MacBook Pro 2,2nd gen */
+	M_MBP_SR,	/* MacBook Pro (Santa Rosa) */
+	M_MBP_4,	/* MacBook Pro, 4th gen */
+	M_MBP_5_1,	/* MacBook Pro, 5,1th gen */
+	M_MBP_5_2,	/* MacBook Pro, 5,2th gen */
+	M_MBP_5_3,	/* MacBook Pro, 5,3rd gen */
+	M_MBP_6_1,	/* MacBook Pro, 6,1th gen */
+	M_MBP_6_2,	/* MacBook Pro, 6,2th gen */
+	M_MBP_7_1,	/* MacBook Pro, 7,1th gen */
+	M_MBP_8_2,	/* MacBook Pro, 8,2nd gen */
+	M_UNKNOWN	/* placeholder */
+};
+
+struct efifb_dmi_info {
+	char *optname;
+	unsigned long base;
+	int stride;
+	int width;
+	int height;
+	int flags;
+};
+
+#ifdef CONFIG_EFI
+
+extern struct efifb_dmi_info efifb_dmi_list[];
+void sysfb_apply_efi_quirks(void);
+
+#else /* CONFIG_EFI */
+
+static inline void sysfb_apply_efi_quirks(void)
+{
+}
+
+#endif /* CONFIG_EFI */
+
+#ifdef CONFIG_X86_SYSFB
+
+bool parse_mode(const struct screen_info *si,
+		struct simplefb_platform_data *mode);
+int create_simplefb(const struct screen_info *si,
+		    const struct simplefb_platform_data *mode);
+
+#else /* CONFIG_X86_SYSFB */
+
+static inline bool parse_mode(const struct screen_info *si,
+			      struct simplefb_platform_data *mode)
+{
+	return false;
+}
+
+static inline int create_simplefb(const struct screen_info *si,
+				  const struct simplefb_platform_data *mode)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_X86_SYSFB */
+
+#endif /* _ARCH_X86_KERNEL_SYSFB_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 095b21507b6a..d35f24e231cd 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -124,9 +124,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
-
-/* indicates that pointers to the topology cpumask_t maps are valid */
-#define arch_provides_topology_pointers		yes
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 88eae2aec619..7036cb60cd87 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -6,11 +6,7 @@
 #include <asm/debugreg.h>
 #include <asm/siginfo.h>			/* TRAP_TRACE, ... */
 
-#ifdef CONFIG_X86_32
-#define dotraplinkage
-#else
-#define dotraplinkage asmlinkage
-#endif
+#define dotraplinkage __visible
 
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index c91e8b9d588b..235be70d5bb4 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -49,6 +49,7 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
+extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_tsc(void);
 
 extern int tsc_clocksource_reliable;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5ee26875baea..5838fa911aa0 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -153,16 +153,19 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  * Careful: we have to cast the result to the type of the pointer
  * for sign reasons.
  *
- * The use of %edx as the register specifier is a bit of a
+ * The use of _ASM_DX as the register specifier is a bit of a
  * simplification, as gcc only cares about it as the starting point
  * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
  * (%ecx being the next register in gcc's x86 register sequence), and
  * %rdx on 64 bits.
+ *
+ * Clang/LLVM cares about the size of the register, but still wants
+ * the base register for something that ends up being a pair.
  */
 #define get_user(x, ptr)						\
 ({									\
 	int __ret_gu;							\
-	register __inttype(*(ptr)) __val_gu asm("%edx");		\
+	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
 	asm volatile("call __get_user_%P3"				\
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index f3e01a2cbaa1..966502d4682e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -387,6 +387,7 @@ enum vmcs_field {
 #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
 #define VMX_EPT_EXTENT_CONTEXT			1
 #define VMX_EPT_EXTENT_GLOBAL			2
+#define VMX_EPT_EXTENT_SHIFT			24
 
 #define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
 #define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
@@ -394,6 +395,7 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT				(1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
 #define VMX_EPT_1GB_PAGE_BIT			(1ull << 17)
+#define VMX_EPT_INVEPT_BIT			(1ull << 20)
 #define VMX_EPT_AD_BIT				    (1ull << 21)
 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index de656ac2af41..d76ac40da206 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -35,7 +35,7 @@
 
 #define DEFINE_VVAR(type, name)						\
 	type name							\
-	__attribute__((section(".vvar_" #name), aligned(16)))
+	__attribute__((section(".vvar_" #name), aligned(16))) __visible
 
 #define VVAR(name) (*vvaraddr_ ## name)
 
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index ca842f2769ef..608a79d5a466 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -7,6 +7,7 @@ enum ipi_vector {
 	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 	XEN_SPIN_UNLOCK_VECTOR,
 	XEN_IRQ_WORK_VECTOR,
+	XEN_NMI_VECTOR,
 
 	XEN_NR_IPIS,
 };
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 125f344f06a9..d866959e5685 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -40,21 +40,7 @@ extern struct start_info *xen_start_info;
 
 static inline uint32_t xen_cpuid_base(void)
 {
-	uint32_t base, eax, ebx, ecx, edx;
-	char signature[13];
-
-	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
-		cpuid(base, &eax, &ebx, &ecx, &edx);
-		*(uint32_t *)(signature + 0) = ebx;
-		*(uint32_t *)(signature + 4) = ecx;
-		*(uint32_t *)(signature + 8) = edx;
-		signature[12] = 0;
-
-		if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
-			return base;
-	}
-
-	return 0;
+	return hypervisor_cpuid_base("XenVMMXenVMM", 2);
 }
 
 #ifdef CONFIG_XEN
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd987e97..94dc8ca434e0 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_PV_UNHALT		7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d651082c7cf7..0e79420376eb 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,7 @@
 #define EXIT_REASON_EOI_INDUCED         45
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_INVEPT              50
 #define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
@@ -106,12 +107,13 @@
 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
+	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
+	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }, \
 	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
 	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
-	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }
+	{ EXIT_REASON_INVPCID,               "INVPCID" }
 
 #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88d99ea77723..a5408b965c9d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -103,6 +103,9 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 obj-$(CONFIG_OF)			+= devicetree.o
 obj-$(CONFIG_UPROBES)			+= uprobes.o
+obj-y					+= sysfb.o
+obj-$(CONFIG_X86_SYSFB)			+= sysfb_simplefb.o
+obj-$(CONFIG_EFI)			+= sysfb_efi.o
 
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2627a81253ee..40c76604199f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
 int acpi_lapic;
 int acpi_ioapic;
 int acpi_strict;
+int acpi_disable_cmcff;
 
 u8 acpi_sci_flags __initdata;
 int acpi_sci_override_gsi __initdata;
@@ -141,16 +142,8 @@ static u32 irq_to_gsi(int irq)
 }
 
 /*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
+ * This is just a simple wrapper around early_ioremap(),
+ * with sanity checks for phys == 0 and size == 0.
  */
 char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 {
@@ -160,6 +153,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 
 	return early_ioremap(phys, size);
 }
+
 void __init __acpi_unmap_table(char *map, unsigned long size)
 {
 	if (!map || !size)
@@ -199,7 +193,7 @@ static void acpi_register_lapic(int id, u8 enabled)
 {
 	unsigned int ver = 0;
 
-	if (id >= (MAX_LOCAL_APIC-1)) {
+	if (id >= MAX_LOCAL_APIC) {
 		printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
 		return;
 	}
@@ -1120,6 +1114,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 	int ioapic;
 	int ioapic_pin;
 	struct io_apic_irq_attr irq_attr;
+	int ret;
 
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
@@ -1149,7 +1144,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 	set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
 			     trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
 			     polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-	io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+	ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+	if (ret < 0)
+		gsi = INT_MIN;
 
 	return gsi;
 }
@@ -1626,6 +1623,10 @@ static int __init parse_acpi(char *arg)
 	/* "acpi=copy_dsdt" copys DSDT */
 	else if (strcmp(arg, "copy_dsdt") == 0) {
 		acpi_gbl_copy_dsdt_locally = 1;
+	}
+	/* "acpi=nocmcff" disables FF mode for corrected errors */
+	else if (strcmp(arg, "nocmcff") == 0) {
+		acpi_disable_cmcff = 1;
 	} else {
 		/* Core will printk when we return error. */
 		return -EINVAL;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c15cf9a25e27..15e8563e5c24 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
 #include <linux/memory.h>
 #include <linux/stop_machine.h>
 #include <linux/slab.h>
+#include <linux/kdebug.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -596,97 +597,93 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	return addr;
 }
 
-/*
- * Cross-modifying kernel text with stop_machine().
- * This code originally comes from immediate value.
- */
-static atomic_t stop_machine_first;
-static int wrote_text;
+static void do_sync_core(void *info)
+{
+	sync_core();
+}
 
-struct text_poke_params {
-	struct text_poke_param *params;
-	int nparams;
-};
+static bool bp_patching_in_progress;
+static void *bp_int3_handler, *bp_int3_addr;
 
-static int __kprobes stop_machine_text_poke(void *data)
+int poke_int3_handler(struct pt_regs *regs)
 {
-	struct text_poke_params *tpp = data;
-	struct text_poke_param *p;
-	int i;
+	/* bp_patching_in_progress */
+	smp_rmb();
 
-	if (atomic_xchg(&stop_machine_first, 0)) {
-		for (i = 0; i < tpp->nparams; i++) {
-			p = &tpp->params[i];
-			text_poke(p->addr, p->opcode, p->len);
-		}
-		smp_wmb();	/* Make sure other cpus see that this has run */
-		wrote_text = 1;
-	} else {
-		while (!wrote_text)
-			cpu_relax();
-		smp_mb();	/* Load wrote_text before following execution */
-	}
+	if (likely(!bp_patching_in_progress))
+		return 0;
 
-	for (i = 0; i < tpp->nparams; i++) {
-		p = &tpp->params[i];
-		flush_icache_range((unsigned long)p->addr,
-				   (unsigned long)p->addr + p->len);
-	}
-	/*
-	 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
-	 * that a core serializing instruction such as "cpuid" should be
-	 * executed on _each_ core before the new instruction is made visible.
-	 */
-	sync_core();
-	return 0;
-}
+	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+		return 0;
+
+	/* set up the specified breakpoint handler */
+	regs->ip = (unsigned long) bp_int3_handler;
+
+	return 1;
 
-/**
- * text_poke_smp - Update instructions on a live kernel on SMP
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy
- *
- * Modify multi-byte instruction by using stop_machine() on SMP. This allows
- * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
- * should be allowed, since stop_machine() does _not_ protect code against
- * NMI and MCE.
- *
- * Note: Must be called under get_online_cpus() and text_mutex.
- */
-void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
-{
-	struct text_poke_params tpp;
-	struct text_poke_param p;
-
-	p.addr = addr;
-	p.opcode = opcode;
-	p.len = len;
-	tpp.params = &p;
-	tpp.nparams = 1;
-	atomic_set(&stop_machine_first, 1);
-	wrote_text = 0;
-	/* Use __stop_machine() because the caller already got online_cpus. */
-	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
-	return addr;
 }
 
 /**
- * text_poke_smp_batch - Update instructions on a live kernel on SMP
- * @params: an array of text_poke parameters
- * @n: the number of elements in params.
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr:	address to patch
+ * @opcode:	opcode of new instruction
+ * @len:	length to copy
+ * @handler:	address to jump to when the temporary breakpoint is hit
  *
- * Modify multi-byte instruction by using stop_machine() on SMP. Since the
- * stop_machine() is heavy task, it is better to aggregate text_poke requests
- * and do it once if possible.
+ * Modify multi-byte instruction by using int3 breakpoint on SMP.
+ * We completely avoid stop_machine() here, and achieve the
+ * synchronization using int3 breakpoint.
  *
- * Note: Must be called under get_online_cpus() and text_mutex.
+ * The way it is done:
+ *	- add a int3 trap to the address that will be patched
+ *	- sync cores
+ *	- update all but the first byte of the patched range
+ *	- sync cores
+ *	- replace the first byte (int3) by the first byte of
+ *	  replacing opcode
+ *	- sync cores
+ *
+ * Note: must be called under text_mutex.
  */
-void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 {
-	struct text_poke_params tpp = {.params = params, .nparams = n};
+	unsigned char int3 = 0xcc;
+
+	bp_int3_handler = handler;
+	bp_int3_addr = (u8 *)addr + sizeof(int3);
+	bp_patching_in_progress = true;
+	/*
+	 * Corresponding read barrier in int3 notifier for
+	 * making sure the in_progress flags is correctly ordered wrt.
+	 * patching
+	 */
+	smp_wmb();
+
+	text_poke(addr, &int3, sizeof(int3));
 
-	atomic_set(&stop_machine_first, 1);
-	wrote_text = 0;
-	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
+	on_each_cpu(do_sync_core, NULL, 1);
+
+	if (len - sizeof(int3) > 0) {
+		/* patch all but the first byte */
+		text_poke((char *)addr + sizeof(int3),
+			  (const char *) opcode + sizeof(int3),
+			  len - sizeof(int3));
+		/*
+		 * According to Intel, this core syncing is very likely
+		 * not necessary and we'd be safe even without it. But
+		 * better safe than sorry (plus there's not only Intel).
+		 */
+		on_each_cpu(do_sync_core, NULL, 1);
+	}
+
+	/* patch the first byte */
+	text_poke(addr, opcode, sizeof(int3));
+
+	on_each_cpu(do_sync_core, NULL, 1);
+
+	bp_patching_in_progress = false;
+	smp_wmb();
+
+	return addr;
 }
+
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3048ded1b598..59554dca96ec 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{}
 };
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
 
 static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{}
 };
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void)
 			next_northbridge(misc, amd_nb_misc_ids);
 		node_to_amd_nb(i)->link = link =
 			next_northbridge(link, amd_nb_link_ids);
-        }
+	}
 
+	/* GART present only on Fam15h upto model 0fh */
 	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-	    boot_cpu_data.x86 == 0x15)
+	    (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
 		amd_northbridges.flags |= AMD_NB_GART;
 
 	/*
+	 * Check for L3 cache presence.
+	 */
+	if (!cpuid_edx(0x80000006))
+		return 0;
+
+	/*
 	 * Some CPU families support L3 Cache Index Disable. There are some
 	 * limitations because of E382 and E388 on family 0x10.
 	 */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index eca89c53a7f5..a7eb82d9b012 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -913,7 +913,7 @@ static void local_apic_timer_interrupt(void)
  * [ if a single-CPU system runs an SMP kernel then we call the local
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -932,7 +932,7 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
-void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -1946,14 +1946,14 @@ static inline void __smp_spurious_interrupt(void)
 		"should never happen.\n", smp_processor_id());
 }
 
-void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_spurious_interrupt();
 	exiting_irq();
 }
 
-void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
@@ -2002,14 +2002,14 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
 
 }
 
-void smp_error_interrupt(struct pt_regs *regs)
+__visible void smp_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_error_interrupt(regs);
 	exiting_irq();
 }
 
-void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void smp_trace_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_error_apic_entry(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ed796ccc32c..e63a5bd2a78f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1534,6 +1534,11 @@ void intel_ir_io_apic_print_entries(unsigned int apic,
 	}
 }
 
+void ioapic_zap_locks(void)
+{
+	raw_spin_lock_init(&ioapic_lock);
+}
+
 __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 {
 	union IO_APIC_reg_00 reg_00;
@@ -3375,12 +3380,15 @@ int io_apic_setup_irq_pin_once(unsigned int irq, int node,
 {
 	unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
+	struct IO_APIC_route_entry orig_entry;
 
 	/* Avoid redundant programming */
 	if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mpc_ioapic_id(ioapic_idx), pin);
-		return 0;
+		pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
+		orig_entry = ioapic_read_entry(attr->ioapic, pin);
+		if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
+			return 0;
+		return -EBUSY;
 	}
 	ret = io_apic_setup_irq_pin(irq, node, attr);
 	if (!ret)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 53a4e2744846..3ab03430211d 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -392,7 +392,7 @@ static struct cpuidle_device apm_cpuidle_device;
 /*
  * Local variables
  */
-static struct {
+__visible struct {
 	unsigned long	offset;
 	unsigned short	segment;
 } apm_bios_entry;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 08a089043ccf..903a264af981 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -66,8 +66,8 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
  *	performance at the same time..
  */
 
-extern void vide(void);
-__asm__(".align 4\nvide: ret");
+extern __visible void vide(void);
+__asm__(".globl vide\n\t.align 4\nvide: ret");
 
 static void init_amd_k5(struct cpuinfo_x86 *c)
 {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 25eb2747b063..2793d1f095a2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1076,7 +1076,7 @@ struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
 				    (unsigned long) debug_idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
-		     irq_stack_union) __aligned(PAGE_SIZE);
+		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
  * The following four percpu variables are hot.  Align current_task to
@@ -1093,7 +1093,7 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
 DEFINE_PER_CPU(char *, irq_stack_ptr) =
 	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
 
-DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 87279212d318..36ce402a3fa5 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -25,11 +25,6 @@
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 
-/*
- * Hypervisor detect order.  This is specified explicitly here because
- * some hypervisors might implement compatibility modes for other
- * hypervisors and therefore need to be detected in specific sequence.
- */
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
 #ifdef CONFIG_XEN_PVHVM
@@ -49,15 +44,19 @@ static inline void __init
 detect_hypervisor_vendor(void)
 {
 	const struct hypervisor_x86 *h, * const *p;
+	uint32_t pri, max_pri = 0;
 
 	for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
 		h = *p;
-		if (h->detect()) {
+		pri = h->detect();
+		if (pri != 0 && pri > max_pri) {
+			max_pri = pri;
 			x86_hyper = h;
-			printk(KERN_INFO "Hypervisor detected: %s\n", h->name);
-			break;
 		}
 	}
+
+	if (max_pri)
+		printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 5b7d4fa5d3b7..09edd0b65fef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
+extern mce_banks_t mce_banks_ce_disabled;
 
 #ifdef CONFIG_X86_MCE_INTEL
 unsigned long mce_intel_adjust_timer(unsigned long interval);
 void mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
+void cmci_disable_bank(int bank);
 #else
 # define mce_intel_adjust_timer mce_adjust_timer_default
 static inline void mce_intel_cmci_poll(void) { }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+static inline void cmci_disable_bank(int bank) { }
 #endif
 
 void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 87a65c939bcd..b3218cdee95f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
 };
 
+/*
+ * MCA banks controlled through firmware first for corrected errors.
+ * This is a global list of banks for which we won't enable CMCI and we
+ * won't poll. Firmware controls these banks and is responsible for
+ * reporting corrected errors through GHES. Uncorrected/recoverable
+ * errors are still notified through a machine check.
+ */
+mce_banks_t mce_banks_ce_disabled;
+
 static DEFINE_PER_CPU(struct work_struct, mce_work);
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
 	&mce_chrdev_ops,
 };
 
+static void __mce_disable_bank(void *arg)
+{
+	int bank = *((int *)arg);
+	__clear_bit(bank, __get_cpu_var(mce_poll_banks));
+	cmci_disable_bank(bank);
+}
+
+void mce_disable_bank(int bank)
+{
+	if (bank >= mca_cfg.banks) {
+		pr_warn(FW_BUG
+			"Ignoring request to disable invalid MCA bank %d.\n",
+			bank);
+		return;
+	}
+	set_bit(bank, mce_banks_ce_disabled);
+	on_each_cpu(__mce_disable_bank, &bank, 1);
+}
+
 /*
  * mce=off Disables machine check
  * mce=no_cmci Disables CMCI
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index d56405309dc1..4cfe0458ca66 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
 		if (test_bit(i, owned))
 			continue;
 
+		/* Skip banks in firmware first mode */
+		if (test_bit(i, mce_banks_ce_disabled))
+			continue;
+
 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 
 		/* Already owned by someone else? */
@@ -271,6 +275,19 @@ void cmci_recheck(void)
 	local_irq_restore(flags);
 }
 
+/* Caller must hold the lock on cmci_discover_lock */
+static void __cmci_disable_bank(int bank)
+{
+	u64 val;
+
+	if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
+		return;
+	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+	val &= ~MCI_CTL2_CMCI_EN;
+	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+	__clear_bit(bank, __get_cpu_var(mce_banks_owned));
+}
+
 /*
  * Disable CMCI on this CPU for all banks it owns when it goes down.
  * This allows other CPUs to claim the banks on rediscovery.
@@ -280,20 +297,12 @@ void cmci_clear(void)
 	unsigned long flags;
 	int i;
 	int banks;
-	u64 val;
 
 	if (!cmci_supported(&banks))
 		return;
 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
-	for (i = 0; i < banks; i++) {
-		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
-			continue;
-		/* Disable CMCI */
-		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
-		val &= ~MCI_CTL2_CMCI_EN;
-		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
-		__clear_bit(i, __get_cpu_var(mce_banks_owned));
-	}
+	for (i = 0; i < banks; i++)
+		__cmci_disable_bank(i);
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
@@ -327,6 +336,19 @@ void cmci_reenable(void)
 		cmci_discover(banks);
 }
 
+void cmci_disable_bank(int bank)
+{
+	int banks;
+	unsigned long flags;
+
+	if (!cmci_supported(&banks))
+		return;
+
+	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+	__cmci_disable_bank(bank);
+	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
 static void intel_init_cmci(void)
 {
 	int banks;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 8f4be53ea04b..71a39f3621ba 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -27,20 +27,23 @@
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
-static bool __init ms_hyperv_platform(void)
+static uint32_t  __init ms_hyperv_platform(void)
 {
 	u32 eax;
 	u32 hyp_signature[3];
 
 	if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
-		return false;
+		return 0;
 
 	cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
 	      &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
 
-	return eax >= HYPERV_CPUID_MIN &&
-		eax <= HYPERV_CPUID_MAX &&
-		!memcmp("Microsoft Hv", hyp_signature, 12);
+	if (eax >= HYPERV_CPUID_MIN &&
+	    eax <= HYPERV_CPUID_MAX &&
+	    !memcmp("Microsoft Hv", hyp_signature, 12))
+		return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+
+	return 0;
 }
 
 static cycle_t read_hv_clock(struct clocksource *arg)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a7c7305030cc..8355c84b9729 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1884,6 +1884,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
 	userpg->cap_usr_time = 0;
+	userpg->cap_usr_time_zero = 0;
 	userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
@@ -1897,6 +1898,11 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 	userpg->time_mult = this_cpu_read(cyc2ns);
 	userpg->time_shift = CYC2NS_SCALE_FACTOR;
 	userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+	if (sched_clock_stable && !check_tsc_disabled()) {
+		userpg->cap_usr_time_zero = 1;
+		userpg->time_zero = this_cpu_read(cyc2ns_offset);
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 97e557bc4c91..cc16faae0538 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -641,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
 
 extern struct event_constraint intel_atom_pebs_event_constraints[];
 
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4cbe03287b08..beeb7cc07044 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -347,8 +347,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
 	struct amd_nb *nb;
 	int i;
 
-	nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
-			  cpu_to_node(cpu));
+	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
 	if (!nb)
 		return NULL;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a45d8d4ace10..0abf6742a8b0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -143,8 +144,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -162,16 +164,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	EVENT_CONSTRAINT_END
+};
+
 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
 
 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
@@ -882,6 +895,140 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+	EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ		SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE		SNB_DMND_RFO
+#define SLM_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS		SNB_RESP_ANY
+#define SLM_LLC_MISS		(SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+	},
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+		[ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_WRITE) ] = {
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_PREFETCH) ] = {
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
 {
 	/* user explicitly requested branch sampling */
@@ -1301,11 +1448,11 @@ static void intel_fixup_er(struct perf_event *event, int idx)
 
 	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01b7;
+		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
 	} else if (idx == EXTRA_REG_RSP_1) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
+		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
 }
@@ -2176,6 +2323,21 @@ __init int intel_pmu_init(void)
 		pr_cont("Atom events, ");
 		break;
 
+	case 55: /* Atom 22nm "Silvermont" */
+		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+			sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_atom();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_slm_extra_regs;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		pr_cont("Silvermont events, ");
+		break;
+
 	case 37: /* 32 nm nehalem, "Clarkdale" */
 	case 44: /* 32 nm nehalem, "Gulftown" */
 	case 47: /* 32 nm Xeon E7 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57a63c1..63438aad177f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -224,7 +224,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -262,7 +262,7 @@ static int alloc_bts_buffer(int cpu)
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -295,7 +295,7 @@ static int alloc_ds_buffer(int cpu)
 	int node = cpu_to_node(cpu);
 	struct debug_store *ds;
 
-	ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
 	if (unlikely(!ds))
 		return -ENOMEM;
 
@@ -517,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+	INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 1fb6c72717bd..fd8011ed4dcd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -6,6 +6,8 @@ static struct intel_uncore_type **pci_uncores = empty_uncore;
 /* pci bus to socket mapping */
 static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
 
+static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+
 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
 
 /* mask of cpus that collect uncore events */
@@ -45,6 +47,24 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
 
 static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 {
@@ -281,7 +301,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
 };
 
 static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event.attr,
+	&format_attr_event_ext.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
@@ -301,6 +321,24 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
 	&format_attr_thresh8.attr,
+	&format_attr_match_rds.attr,
+	&format_attr_match_rnid30.attr,
+	&format_attr_match_rnid4.attr,
+	&format_attr_match_dnid.attr,
+	&format_attr_match_mc.attr,
+	&format_attr_match_opc.attr,
+	&format_attr_match_vnw.attr,
+	&format_attr_match0.attr,
+	&format_attr_match1.attr,
+	&format_attr_mask_rds.attr,
+	&format_attr_mask_rnid30.attr,
+	&format_attr_mask_rnid4.attr,
+	&format_attr_mask_dnid.attr,
+	&format_attr_mask_mc.attr,
+	&format_attr_mask_opc.attr,
+	&format_attr_mask_vnw.attr,
+	&format_attr_mask0.attr,
+	&format_attr_mask1.attr,
 	NULL,
 };
 
@@ -356,13 +394,16 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
 	SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
 };
 
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()			\
+	.init_box	= snbep_uncore_pci_init_box,		\
+	.disable_box	= snbep_uncore_pci_disable_box,		\
+	.enable_box	= snbep_uncore_pci_enable_box,		\
+	.disable_event	= snbep_uncore_pci_disable_event,	\
+	.read_counter	= snbep_uncore_pci_read_counter
+
 static struct intel_uncore_ops snbep_uncore_pci_ops = {
-	.init_box	= snbep_uncore_pci_init_box,
-	.disable_box	= snbep_uncore_pci_disable_box,
-	.enable_box	= snbep_uncore_pci_enable_box,
-	.disable_event	= snbep_uncore_pci_disable_event,
-	.enable_event	= snbep_uncore_pci_enable_event,
-	.read_counter	= snbep_uncore_pci_read_counter,
+	SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+	.enable_event	= snbep_uncore_pci_enable_event,	\
 };
 
 static struct event_constraint snbep_uncore_cbox_constraints[] = {
@@ -726,6 +767,61 @@ static struct intel_uncore_type *snbep_msr_uncores[] = {
 	NULL,
 };
 
+enum {
+	SNBEP_PCI_QPI_PORT0_FILTER,
+	SNBEP_PCI_QPI_PORT1_FILTER,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+		reg1->idx = 0;
+		reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+		reg1->config = event->attr.config1;
+		reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+		reg2->config = event->attr.config2;
+	}
+	return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	if (reg1->idx != EXTRA_REG_NONE) {
+		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+		struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx];
+		WARN_ON_ONCE(!filter_pdev);
+		if (filter_pdev) {
+			pci_write_config_dword(filter_pdev, reg1->reg,
+						(u32)reg1->config);
+			pci_write_config_dword(filter_pdev, reg1->reg + 4,
+						(u32)(reg1->config >> 32));
+			pci_write_config_dword(filter_pdev, reg2->reg,
+						(u32)reg2->config);
+			pci_write_config_dword(filter_pdev, reg2->reg + 4,
+						(u32)(reg2->config >> 32));
+		}
+	}
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+	SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+	.enable_event		= snbep_qpi_enable_event,
+	.hw_config		= snbep_qpi_hw_config,
+	.get_constraint		= uncore_get_constraint,
+	.put_constraint		= uncore_put_constraint,
+};
+
 #define SNBEP_UNCORE_PCI_COMMON_INIT()				\
 	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\
 	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\
@@ -755,17 +851,18 @@ static struct intel_uncore_type snbep_uncore_imc = {
 };
 
 static struct intel_uncore_type snbep_uncore_qpi = {
-	.name		= "qpi",
-	.num_counters   = 4,
-	.num_boxes	= 2,
-	.perf_ctr_bits	= 48,
-	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
-	.event_ctl	= SNBEP_PCI_PMON_CTL0,
-	.event_mask	= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
-	.ops		= &snbep_uncore_pci_ops,
-	.event_descs	= snbep_uncore_qpi_events,
-	.format_group	= &snbep_uncore_qpi_format_group,
+	.name			= "qpi",
+	.num_counters		= 4,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
+	.event_ctl		= SNBEP_PCI_PMON_CTL0,
+	.event_mask		= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCI_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &snbep_uncore_qpi_ops,
+	.event_descs		= snbep_uncore_qpi_events,
+	.format_group		= &snbep_uncore_qpi_format_group,
 };
 
 
@@ -807,43 +904,53 @@ static struct intel_uncore_type *snbep_pci_uncores[] = {
 static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
 	{ /* Home Agent */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
-		.driver_data = SNBEP_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
 	},
 	{ /* MC Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
 	},
 	{ /* MC Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
 	},
 	{ /* MC Channel 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
 	},
 	{ /* MC Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
 	},
 	{ /* QPI Port 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
-		.driver_data = SNBEP_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
 	},
 	{ /* QPI Port 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
-		.driver_data = SNBEP_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
 	},
 	{ /* R2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
-		.driver_data = SNBEP_PCI_UNCORE_R2PCIE,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
 	},
 	{ /* R3QPI Link 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
-		.driver_data = SNBEP_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
 	},
 	{ /* R3QPI Link 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
-		.driver_data = SNBEP_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT0_FILTER),
+	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT1_FILTER),
 	},
 	{ /* end: all zeroes */ }
 };
@@ -1256,71 +1363,71 @@ static struct intel_uncore_type *ivt_pci_uncores[] = {
 static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
 	{ /* Home Agent 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
-		.driver_data = IVT_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
 	},
 	{ /* Home Agent 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
-		.driver_data = IVT_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1),
 	},
 	{ /* MC0 Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0),
 	},
 	{ /* MC0 Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1),
 	},
 	{ /* MC0 Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2),
 	},
 	{ /* MC0 Channel 4 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3),
 	},
 	{ /* MC1 Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4),
 	},
 	{ /* MC1 Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5),
 	},
 	{ /* MC1 Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6),
 	},
 	{ /* MC1 Channel 4 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
 	},
 	{ /* QPI0 Port 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
 	},
 	{ /* QPI0 Port 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1),
 	},
 	{ /* QPI1 Port 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2),
 	},
 	{ /* R2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
-		.driver_data = IVT_PCI_UNCORE_R2PCIE,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0),
 	},
 	{ /* R3QPI0 Link 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0),
 	},
 	{ /* R3QPI0 Link 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1),
 	},
 	{ /* R3QPI1 Link 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
 	},
 	{ /* end: all zeroes */ }
 };
@@ -2606,7 +2713,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
 
 	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
 
-	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
+	box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
 	if (!box)
 		return NULL;
 
@@ -3167,16 +3274,24 @@ static bool pcidrv_registered;
 /*
  * add a pci uncore device
  */
-static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, phys_id;
+	struct intel_uncore_type *type;
+	int phys_id;
 
 	phys_id = pcibus_to_physid[pdev->bus->number];
 	if (phys_id < 0)
 		return -ENODEV;
 
+	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+		extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev;
+		pci_set_drvdata(pdev, NULL);
+		return 0;
+	}
+
+	type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
 	box = uncore_alloc_box(type, 0);
 	if (!box)
 		return -ENOMEM;
@@ -3185,21 +3300,11 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
 	 * for performance monitoring unit with multiple boxes,
 	 * each box has a different function id.
 	 */
-	for (i = 0; i < type->num_boxes; i++) {
-		pmu = &type->pmus[i];
-		if (pmu->func_id == pdev->devfn)
-			break;
-		if (pmu->func_id < 0) {
-			pmu->func_id = pdev->devfn;
-			break;
-		}
-		pmu = NULL;
-	}
-
-	if (!pmu) {
-		kfree(box);
-		return -EINVAL;
-	}
+	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+	if (pmu->func_id < 0)
+		pmu->func_id = pdev->devfn;
+	else
+		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
 
 	box->phys_id = phys_id;
 	box->pci_dev = pdev;
@@ -3217,9 +3322,22 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
-	struct intel_uncore_pmu *pmu = box->pmu;
-	int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+	struct intel_uncore_pmu *pmu;
+	int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
 
+	box = pci_get_drvdata(pdev);
+	if (!box) {
+		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+			if (extra_pci_dev[phys_id][i] == pdev) {
+				extra_pci_dev[phys_id][i] = NULL;
+				break;
+			}
+		}
+		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+		return;
+	}
+
+	pmu = box->pmu;
 	if (WARN_ON_ONCE(phys_id != box->phys_id))
 		return;
 
@@ -3240,12 +3358,6 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 	kfree(box);
 }
 
-static int uncore_pci_probe(struct pci_dev *pdev,
-			    const struct pci_device_id *id)
-{
-	return uncore_pci_add(pci_uncores[id->driver_data], pdev);
-}
-
 static int __init uncore_pci_init(void)
 {
 	int ret;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 47b3d00c9d89..a80ab71a883d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -12,6 +12,15 @@
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
 #define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
 
+#define UNCORE_PCI_DEV_DATA(type, idx)	((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV		0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX	2
+
+/* support up to 8 sockets */
+#define UNCORE_SOCKET_MAX		8
+
 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
 
 /* SNB event control */
@@ -108,6 +117,7 @@
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 7076878404ec..628a059a9a06 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -93,7 +93,7 @@ static void __init vmware_platform_setup(void)
  * serial key should be enough, as this will always have a VMware
  * specific string when running under VMware hypervisor.
  */
-static bool __init vmware_platform(void)
+static uint32_t __init vmware_platform(void)
 {
 	if (cpu_has_hypervisor) {
 		unsigned int eax;
@@ -102,12 +102,12 @@ static bool __init vmware_platform(void)
 		cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
 		      &hyper_vendor_id[1], &hyper_vendor_id[2]);
 		if (!memcmp(hyper_vendor_id, "VMwareVMware", 12))
-			return true;
+			return CPUID_VMWARE_INFO_LEAF;
 	} else if (dmi_available && dmi_name_in_serial("VMware") &&
 		   __vmware_platform())
-		return true;
+		return 1;
 
-	return false;
+	return 0;
 }
 
 /*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 74467feb4dc5..e0e0841eef45 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -128,7 +128,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	cpu_emergency_svm_disable();
 
 	lapic_shutdown();
-#if defined(CONFIG_X86_IO_APIC)
+#ifdef CONFIG_X86_IO_APIC
+	/* Prevent crash_kexec() from deadlocking on ioapic_lock. */
+	ioapic_zap_locks();
 	disable_IO_APIC();
 #endif
 #ifdef CONFIG_HPET_TIMER
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..174da5fc5a7b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -658,15 +658,18 @@ __init void e820_setup_gap(void)
  * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
  * linked list of struct setup_data, which is parsed here.
  */
-void __init parse_e820_ext(struct setup_data *sdata)
+void __init parse_e820_ext(u64 phys_addr, u32 data_len)
 {
 	int entries;
 	struct e820entry *extmap;
+	struct setup_data *sdata;
 
+	sdata = early_memremap(phys_addr, data_len);
 	entries = sdata->len / sizeof(struct e820entry);
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	early_iounmap(sdata, data_len);
 	printk(KERN_INFO "e820: extended physical RAM map:\n");
 	e820_print_map("extended");
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 138463a24877..06f87bece92a 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void)
 	reserve_ebda_region();
 }
 
-void __init i386_start_kernel(void)
+asmlinkage void __init i386_start_kernel(void)
 {
 	sanitize_boot_params(&boot_params);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 55b67614ed94..1be8e43b669e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5dd87a89f011..81ba27679f18 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -409,6 +409,7 @@ enable_paging:
 /*
  * Check if it is 486
  */
+	movb $4,X86			# at least 486
 	cmpl $-1,X86_CPUID
 	je is486
 
@@ -436,7 +437,6 @@ enable_paging:
 	movl %edx,X86_CAPABILITY
 
 is486:
-	movb $4,X86
 	movl $0x50022,%ecx	# set AM, WP, NE and MP
 	movl %cr0,%eax
 	andl $0x80000011,%eax	# Save PG,PE,ET
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3a8185c042a2..22d0687e7fda 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -177,7 +177,7 @@ u64 arch_irq_stat(void)
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
-unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -215,7 +215,7 @@ void __smp_x86_platform_ipi(void)
 		x86_platform_ipi_callback();
 }
 
-void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -229,7 +229,7 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
 /*
  * Handler for POSTED_INTERRUPT_VECTOR.
  */
-void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -247,7 +247,7 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
 }
 #endif
 
-void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 636a55e4a13c..1de84e3ab4e0 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -22,14 +22,14 @@ static inline void __smp_irq_work_interrupt(void)
 	irq_work_run();
 }
 
-void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void smp_irq_work_interrupt(struct pt_regs *regs)
 {
 	irq_work_entering_irq();
 	__smp_irq_work_interrupt();
 	exiting_irq();
 }
 
-void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
 	irq_work_entering_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 2889b3d43882..460f5d9ceebb 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,7 +37,19 @@ static void __jump_label_transform(struct jump_entry *entry,
 	} else
 		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
 
-	(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	/*
+	 * Make text_poke_bp() a default fallback poker.
+	 *
+	 * At the time the change is being done, just ignore whether we
+	 * are doing nop -> jump or jump -> nop transition, and assume
+	 * always nop being the 'currently valid' instruction
+	 *
+	 */
+	if (poker)
+		(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	else
+		text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
+			     (void *)entry->code + JUMP_LABEL_NOP_SIZE);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
@@ -45,7 +57,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 {
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	__jump_label_transform(entry, type, text_poke_smp);
+	__jump_label_transform(entry, type, NULL);
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2e9d4b5af036..c6ee63f927ab 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -82,14 +82,9 @@ extern void synthesize_reljump(void *from, void *to);
 extern void synthesize_relcall(void *from, void *to);
 
 #ifdef	CONFIG_OPTPROBES
-extern int arch_init_optprobes(void);
 extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
 extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
 #else	/* !CONFIG_OPTPROBES */
-static inline int arch_init_optprobes(void)
-{
-	return 0;
-}
 static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 {
 	return 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 211bce445522..79a3f9682871 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -661,7 +661,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
 /*
  * Called from kretprobe_trampoline
  */
-static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
@@ -1068,7 +1068,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 
 int __init arch_init_kprobes(void)
 {
-	return arch_init_optprobes();
+	return 0;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 76dc6f095724..898160b42e43 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -88,9 +88,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long v
 	*(unsigned long *)addr = val;
 }
 
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
-	asm volatile (
+asm (
 			".global optprobe_template_entry\n"
 			"optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
@@ -129,7 +127,6 @@ static void __used __kprobes kprobes_optinsn_template_holder(void)
 #endif
 			".global optprobe_template_end\n"
 			"optprobe_template_end:\n");
-}
 
 #define TMPL_MOVE_IDX \
 	((long)&optprobe_template_val - (long)&optprobe_template_entry)
@@ -371,31 +368,6 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 	return 0;
 }
 
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
-	u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
-					    u8 *insn_buf,
-					    struct optimized_kprobe *op)
-{
-	s32 rel = (s32)((long)op->optinsn.insn -
-			((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
-	/* Backup instructions which will be replaced by jump address */
-	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
-	       RELATIVE_ADDR_SIZE);
-
-	insn_buf[0] = RELATIVEJUMP_OPCODE;
-	*(s32 *)(&insn_buf[1]) = rel;
-
-	tprm->addr = op->kp.addr;
-	tprm->opcode = insn_buf;
-	tprm->len = RELATIVEJUMP_SIZE;
-}
-
 /*
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
@@ -403,37 +375,38 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
 void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 {
 	struct optimized_kprobe *op, *tmp;
-	int c = 0;
+	u8 insn_buf[RELATIVEJUMP_SIZE];
 
 	list_for_each_entry_safe(op, tmp, oplist, list) {
+		s32 rel = (s32)((long)op->optinsn.insn -
+			((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
 		WARN_ON(kprobe_disabled(&op->kp));
-		/* Setup param */
-		setup_optimize_kprobe(&jump_poke_params[c],
-				      jump_poke_bufs[c].buf, op);
+
+		/* Backup instructions which will be replaced by jump address */
+		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+		       RELATIVE_ADDR_SIZE);
+
+		insn_buf[0] = RELATIVEJUMP_OPCODE;
+		*(s32 *)(&insn_buf[1]) = rel;
+
+		text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+			     op->optinsn.insn);
+
 		list_del_init(&op->list);
-		if (++c >= MAX_OPTIMIZE_PROBES)
-			break;
 	}
-
-	/*
-	 * text_poke_smp doesn't support NMI/MCE code modifying.
-	 * However, since kprobes itself also doesn't support NMI/MCE
-	 * code probing, it's not a problem.
-	 */
-	text_poke_smp_batch(jump_poke_params, c);
 }
 
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
-					      u8 *insn_buf,
-					      struct optimized_kprobe *op)
+/* Replace a relative jump with a breakpoint (int3).  */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
+	u8 insn_buf[RELATIVEJUMP_SIZE];
+
 	/* Set int3 to first byte for kprobes */
 	insn_buf[0] = BREAKPOINT_INSTRUCTION;
 	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
-	tprm->addr = op->kp.addr;
-	tprm->opcode = insn_buf;
-	tprm->len = RELATIVEJUMP_SIZE;
+	text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+		     op->optinsn.insn);
 }
 
 /*
@@ -444,34 +417,11 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
 				    struct list_head *done_list)
 {
 	struct optimized_kprobe *op, *tmp;
-	int c = 0;
 
 	list_for_each_entry_safe(op, tmp, oplist, list) {
-		/* Setup param */
-		setup_unoptimize_kprobe(&jump_poke_params[c],
-					jump_poke_bufs[c].buf, op);
+		arch_unoptimize_kprobe(op);
 		list_move(&op->list, done_list);
-		if (++c >= MAX_OPTIMIZE_PROBES)
-			break;
 	}
-
-	/*
-	 * text_poke_smp doesn't support NMI/MCE code modifying.
-	 * However, since kprobes itself also doesn't support NMI/MCE
-	 * code probing, it's not a problem.
-	 */
-	text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
-	u8 buf[RELATIVEJUMP_SIZE];
-
-	/* Set int3 to first byte for kprobes */
-	buf[0] = BREAKPOINT_INSTRUCTION;
-	memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-	text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
 }
 
 int  __kprobes
@@ -491,22 +441,3 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 	}
 	return 0;
 }
-
-int __kprobes arch_init_optprobes(void)
-{
-	/* Allocate code buffer and parameter array */
-	jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
-				 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-	if (!jump_poke_bufs)
-		return -ENOMEM;
-
-	jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
-				   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-	if (!jump_poke_params) {
-		kfree(jump_poke_bufs);
-		jump_poke_bufs = NULL;
-		return -ENOMEM;
-	}
-
-	return 0;
-}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32cc55b8..697b93af02dd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/kprobes.h>
+#include <linux/debugfs.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
 	WARN_ON(kvm_register_clock("primary cpu clock"));
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
+	kvm_spinlock_init();
 }
 
 static void kvm_guest_cpu_online(void *dummy)
@@ -498,11 +500,9 @@ void __init kvm_guest_init(void)
 #endif
 }
 
-static bool __init kvm_detect(void)
+static uint32_t __init kvm_detect(void)
 {
-	if (!kvm_para_available())
-		return false;
-	return true;
+	return kvm_cpuid_base();
 }
 
 const struct hypervisor_x86 x86_hyper_kvm __refconst = {
@@ -523,3 +523,263 @@ static __init int activate_jump_labels(void)
 	return 0;
 }
 arch_initcall(activate_jump_labels);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+static void kvm_kick_cpu(int cpu)
+{
+	int apicid;
+	unsigned long flags = 0;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
+}
+
+enum kvm_contention_stat {
+	TAKEN_SLOW,
+	TAKEN_SLOW_PICKUP,
+	RELEASED_SLOW,
+	RELEASED_SLOW_KICKED,
+	NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS	30
+
+static struct kvm_spinlock_stats
+{
+	u32 contention_stats[NR_CONTENTION_STATS];
+	u32 histo_spin_blocked[HISTO_BUCKETS+1];
+	u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+	u8 ret;
+	u8 old;
+
+	old = ACCESS_ONCE(zero_stats);
+	if (unlikely(old)) {
+		ret = cmpxchg(&zero_stats, old, 0);
+		/* This ensures only one fellow resets the stat */
+		if (ret == old)
+			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+	}
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+	check_zero();
+	spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+	return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+	unsigned index;
+
+	index = ilog2(delta);
+	check_zero();
+
+	if (index < HISTO_BUCKETS)
+		array[index]++;
+	else
+		array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+	u32 delta;
+
+	delta = sched_clock() - start;
+	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+	spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+	d_kvm_debug = debugfs_create_dir("kvm", NULL);
+	if (!d_kvm_debug)
+		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+	return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+	struct dentry *d_kvm;
+
+	d_kvm = kvm_init_debugfs();
+	if (d_kvm == NULL)
+		return -ENOMEM;
+
+	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
+	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+	debugfs_create_u32("released_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
+	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+			   &spinlock_stats.time_blocked);
+
+	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+	return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else  /* !CONFIG_KVM_DEBUG_FS */
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+	return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif  /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+	struct kvm_lock_waiting *w;
+	int cpu;
+	u64 start;
+	unsigned long flags;
+
+	if (in_nmi())
+		return;
+
+	w = &__get_cpu_var(klock_waiting);
+	cpu = smp_processor_id();
+	start = spin_time_start();
+
+	/*
+	 * Make sure an interrupt handler can't upset things in a
+	 * partially setup state.
+	 */
+	local_irq_save(flags);
+
+	/*
+	 * The ordering protocol on this is that the "lock" pointer
+	 * may only be set non-NULL if the "want" ticket is correct.
+	 * If we're updating "want", we must first clear "lock".
+	 */
+	w->lock = NULL;
+	smp_wmb();
+	w->want = want;
+	smp_wmb();
+	w->lock = lock;
+
+	add_stats(TAKEN_SLOW, 1);
+
+	/*
+	 * This uses set_bit, which is atomic but we should not rely on its
+	 * reordering gurantees. So barrier is needed after this call.
+	 */
+	cpumask_set_cpu(cpu, &waiting_cpus);
+
+	barrier();
+
+	/*
+	 * Mark entry to slowpath before doing the pickup test to make
+	 * sure we don't deadlock with an unlocker.
+	 */
+	__ticket_enter_slowpath(lock);
+
+	/*
+	 * check again make sure it didn't become free while
+	 * we weren't looking.
+	 */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		add_stats(TAKEN_SLOW_PICKUP, 1);
+		goto out;
+	}
+
+	/*
+	 * halt until it's our turn and kicked. Note that we do safe halt
+	 * for irq enabled case to avoid hang when lock info is overwritten
+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
+	 */
+	if (arch_irqs_disabled_flags(flags))
+		halt();
+	else
+		safe_halt();
+
+out:
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
+	local_irq_restore(flags);
+	spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+	int cpu;
+
+	add_stats(RELEASED_SLOW, 1);
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
+		if (ACCESS_ONCE(w->lock) == lock &&
+		    ACCESS_ONCE(w->want) == ticket) {
+			add_stats(RELEASED_SLOW_KICKED, 1);
+			kvm_kick_cpu(cpu);
+			break;
+		}
+	}
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+	/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+		return;
+
+	printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+
+	static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+	pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..bbb6c7316341 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/jump_label.h>
 
 #include <asm/paravirt.h>
 
-static inline void
-default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
-	.spin_is_locked = __ticket_spin_is_locked,
-	.spin_is_contended = __ticket_spin_is_contended,
-
-	.spin_lock = __ticket_spin_lock,
-	.spin_lock_flags = default_spin_lock_flags,
-	.spin_trylock = __ticket_spin_trylock,
-	.spin_unlock = __ticket_spin_unlock,
+	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
+	.unlock_kick = paravirt_nop,
 #endif
 };
 EXPORT_SYMBOL(pv_lock_ops);
 
+struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index cd6de64cc480..1b10af835c31 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -62,11 +62,6 @@ void __init default_banner(void)
 	       pv_info.name);
 }
 
-/* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code)					\
-	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-
 /* Undefined instruction for dealing with missing ops pointers. */
 static const unsigned char ud2a[] = { 0x0f, 0x0b };
 
@@ -324,7 +319,7 @@ struct pv_time_ops pv_time_ops = {
 	.steal_clock = native_steal_clock,
 };
 
-struct pv_irq_ops pv_irq_ops = {
+__visible struct pv_irq_ops pv_irq_ops = {
 	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
 	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
 	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -336,7 +331,7 @@ struct pv_irq_ops pv_irq_ops = {
 #endif
 };
 
-struct pv_cpu_ops pv_cpu_ops = {
+__visible struct pv_cpu_ops pv_cpu_ops = {
 	.cpuid = native_cpuid,
 	.get_debugreg = native_get_debugreg,
 	.set_debugreg = native_set_debugreg,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 83369e5a1d27..c83516be1052 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -36,7 +36,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8adefca71dc..884f98f69354 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(start_thread);
  * the task-switch, and shows up in ret_from_fork in entry.S,
  * for example.
  */
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 05646bab4ca6..bb1dc51bab05 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-DEFINE_PER_CPU(unsigned long, old_rsp);
+asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
@@ -274,7 +274,7 @@ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
  * Kprobes not supported here. Set the probe on schedule instead.
  * Function graph tracer not supported too.
  */
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2cb9470ea85b..a16bae3f83b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
-static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
-
-static struct pvclock_vsyscall_time_info *
-pvclock_get_vsyscall_user_time_info(int cpu)
-{
-	if (!pvclock_vdso_info) {
-		BUG();
-		return NULL;
-	}
-
-	return &pvclock_vdso_info[cpu];
-}
-
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
-{
-	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
-}
-
 #ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
-			        void *v)
-{
-	struct task_migration_notifier *mn = v;
-	struct pvclock_vsyscall_time_info *pvti;
-
-	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
-	/* this is NULL when pvclock vsyscall is not initialized */
-	if (unlikely(pvti == NULL))
-		return NOTIFY_DONE;
-
-	pvti->migrate_count++;
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
-	.notifier_call = pvclock_task_migrate,
-};
-
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
 	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
-	pvclock_vdso_info = i;
-
 	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
 		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 			     __pa(i) + (idx*PAGE_SIZE),
 			     PAGE_KERNEL_VVAR);
 	}
 
-
-	register_task_migration_notifier(&pvclock_migrate);
-
 	return 0;
 }
 #endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f8ec57815c05..f0de6294b955 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -206,9 +206,9 @@ EXPORT_SYMBOL(boot_cpu_data);
 
 
 #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-unsigned long mmu_cr4_features;
+__visible unsigned long mmu_cr4_features;
 #else
-unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
 /* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -426,25 +426,23 @@ static void __init reserve_initrd(void)
 static void __init parse_setup_data(void)
 {
 	struct setup_data *data;
-	u64 pa_data;
+	u64 pa_data, pa_next;
 
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
-		u32 data_len, map_len;
+		u32 data_len, map_len, data_type;
 
 		map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
 			      (u64)sizeof(struct setup_data));
 		data = early_memremap(pa_data, map_len);
 		data_len = data->len + sizeof(struct setup_data);
-		if (data_len > map_len) {
-			early_iounmap(data, map_len);
-			data = early_memremap(pa_data, data_len);
-			map_len = data_len;
-		}
+		data_type = data->type;
+		pa_next = data->next;
+		early_iounmap(data, map_len);
 
-		switch (data->type) {
+		switch (data_type) {
 		case SETUP_E820_EXT:
-			parse_e820_ext(data);
+			parse_e820_ext(pa_data, data_len);
 			break;
 		case SETUP_DTB:
 			add_dtb(pa_data);
@@ -452,8 +450,7 @@ static void __init parse_setup_data(void)
 		default:
 			break;
 		}
-		pa_data = data->next;
-		early_iounmap(data, map_len);
+		pa_data = pa_next;
 	}
 }
 
@@ -1070,7 +1067,7 @@ void __init setup_arch(char **cmdline_p)
 
 	cleanup_highmap();
 
-	memblock.current_limit = ISA_END_ADDRESS;
+	memblock_set_current_limit(ISA_END_ADDRESS);
 	memblock_x86_fill();
 
 	/*
@@ -1103,7 +1100,7 @@ void __init setup_arch(char **cmdline_p)
 
 	setup_real_mode();
 
-	memblock.current_limit = get_max_mapped();
+	memblock_set_current_limit(get_max_mapped());
 	dma_contiguous_reserve(0);
 
 	/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cf913587d4dd..9e5de6813e1f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -358,7 +358,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  */
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -423,7 +423,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  If provided, use a stub
 		   already in userspace.  */
@@ -490,7 +490,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
 
 		if (ksig->ka.sa.sa_flags & SA_RESTORER) {
@@ -533,7 +533,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
  * Do a signal return; undo the signal stack.
  */
 #ifdef CONFIG_X86_32
-unsigned long sys_sigreturn(void)
+asmlinkage unsigned long sys_sigreturn(void)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe __user *frame;
@@ -562,7 +562,7 @@ badframe:
 }
 #endif /* CONFIG_X86_32 */
 
-long sys_rt_sigreturn(void)
+asmlinkage long sys_rt_sigreturn(void)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
@@ -728,7 +728,7 @@ static void do_signal(struct pt_regs *regs)
  * notification of userspace execution resumption
  * - triggered by the TIF_WORK_MASK flags
  */
-void
+__visible void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
 	user_exit();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index cdaa347dfcad..7c3a5a61f2e4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -256,7 +256,7 @@ static inline void __smp_reschedule_interrupt(void)
 	scheduler_ipi();
 }
 
-void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
@@ -271,7 +271,7 @@ static inline void smp_entering_irq(void)
 	irq_enter();
 }
 
-void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
 {
 	/*
 	 * Need to call irq_enter() before calling the trace point.
@@ -295,14 +295,14 @@ static inline void __smp_call_function_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void smp_call_function_interrupt(struct pt_regs *regs)
 {
 	smp_entering_irq();
 	__smp_call_function_interrupt();
 	exiting_irq();
 }
 
-void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
 	smp_entering_irq();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -317,14 +317,14 @@ static inline void __smp_call_function_single_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
 {
 	smp_entering_irq();
 	__smp_call_function_single_interrupt();
 	exiting_irq();
 }
 
-void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
 	smp_entering_irq();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index 147fcd4941c4..e9bcd57d8a9e 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -15,7 +15,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void);
 
 extern asmlinkage void sys_ni_syscall(void);
 
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 5c7f8c20da74..4ac730b37f0b 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -4,6 +4,7 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
+#include <asm/syscall.h>
 
 #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
 
@@ -19,11 +20,9 @@
 
 #define __SYSCALL_64(nr, sym, compat) [nr] = sym,
 
-typedef void (*sys_call_ptr_t)(void);
-
 extern void sys_ni_syscall(void);
 
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c
new file mode 100644
index 000000000000..193ec2ce46c7
--- /dev/null
+++ b/arch/x86/kernel/sysfb.c
@@ -0,0 +1,74 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * Simple-Framebuffer support for x86 systems
+ * Create a platform-device for any available boot framebuffer. The
+ * simple-framebuffer platform device is already available on DT systems, so
+ * this module parses the global "screen_info" object and creates a suitable
+ * platform device compatible with the "simple-framebuffer" DT object. If
+ * the framebuffer is incompatible, we instead create a legacy
+ * "vesa-framebuffer", "efi-framebuffer" or "platform-framebuffer" device and
+ * pass the screen_info as platform_data. This allows legacy drivers
+ * to pick these devices up without messing with simple-framebuffer drivers.
+ * The global "screen_info" is still valid at all times.
+ *
+ * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer"
+ * platform devices, but only use legacy framebuffer devices for
+ * backwards compatibility.
+ *
+ * TODO: We set the dev_id field of all platform-devices to 0. This allows
+ * other x86 OF/DT parsers to create such devices, too. However, they must
+ * start at offset 1 for this to work.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static __init int sysfb_init(void)
+{
+	struct screen_info *si = &screen_info;
+	struct simplefb_platform_data mode;
+	struct platform_device *pd;
+	const char *name;
+	bool compatible;
+	int ret;
+
+	sysfb_apply_efi_quirks();
+
+	/* try to create a simple-framebuffer device */
+	compatible = parse_mode(si, &mode);
+	if (compatible) {
+		ret = create_simplefb(si, &mode);
+		if (!ret)
+			return 0;
+	}
+
+	/* if the FB is incompatible, create a legacy framebuffer device */
+	if (si->orig_video_isVGA == VIDEO_TYPE_EFI)
+		name = "efi-framebuffer";
+	else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+		name = "vesa-framebuffer";
+	else
+		name = "platform-framebuffer";
+
+	pd = platform_device_register_resndata(NULL, name, 0,
+					       NULL, 0, si, sizeof(*si));
+	return IS_ERR(pd) ? PTR_ERR(pd) : 0;
+}
+
+/* must execute after PCI subsystem for EFI quirks */
+device_initcall(sysfb_init);
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
new file mode 100644
index 000000000000..b285d4e8c68e
--- /dev/null
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -0,0 +1,214 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * EFI Quirks
+ * Several EFI systems do not correctly advertise their boot framebuffers.
+ * Hence, we use this static table of known broken machines and fix up the
+ * information so framebuffer drivers can load corectly.
+ */
+
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/screen_info.h>
+#include <video/vga.h>
+#include <asm/sysfb.h>
+
+enum {
+	OVERRIDE_NONE = 0x0,
+	OVERRIDE_BASE = 0x1,
+	OVERRIDE_STRIDE = 0x2,
+	OVERRIDE_HEIGHT = 0x4,
+	OVERRIDE_WIDTH = 0x8,
+};
+
+struct efifb_dmi_info efifb_dmi_list[] = {
+	[M_I17] = { "i17", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, /* guess */
+	[M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE },
+	[M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, /* guess */
+	[M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+	[M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080, OVERRIDE_NONE },
+	[M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440, OVERRIDE_NONE },
+	[M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768, OVERRIDE_NONE },
+	[M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768, OVERRIDE_NONE },
+	[M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+	[M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	[M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	[M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	[M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	[M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	/* 11" Macbook Air 3,1 passes the wrong stride */
+	[M_MBA_3] = { "mba3", 0, 2048 * 4, 0, 0, OVERRIDE_STRIDE },
+	[M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_MBP_2] = { "mbp2", 0, 0, 0, 0, OVERRIDE_NONE }, /* placeholder */
+	[M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+	[M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+	[M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+	[M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050, OVERRIDE_NONE },
+	[M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+	[M_MBP_8_2] = { "mbp82", 0x90010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+	[M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
+};
+
+#define choose_value(dmivalue, fwvalue, field, flags) ({	\
+		typeof(fwvalue) _ret_ = fwvalue;		\
+		if ((flags) & (field))				\
+			_ret_ = dmivalue;			\
+		else if ((fwvalue) == 0)			\
+			_ret_ = dmivalue;			\
+		_ret_;						\
+	})
+
+static int __init efifb_set_system(const struct dmi_system_id *id)
+{
+	struct efifb_dmi_info *info = id->driver_data;
+
+	if (info->base == 0 && info->height == 0 && info->width == 0 &&
+	    info->stride == 0)
+		return 0;
+
+	/* Trust the bootloader over the DMI tables */
+	if (screen_info.lfb_base == 0) {
+#if defined(CONFIG_PCI)
+		struct pci_dev *dev = NULL;
+		int found_bar = 0;
+#endif
+		if (info->base) {
+			screen_info.lfb_base = choose_value(info->base,
+				screen_info.lfb_base, OVERRIDE_BASE,
+				info->flags);
+
+#if defined(CONFIG_PCI)
+			/* make sure that the address in the table is actually
+			 * on a VGA device's PCI BAR */
+
+			for_each_pci_dev(dev) {
+				int i;
+				if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+					continue;
+				for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+					resource_size_t start, end;
+
+					start = pci_resource_start(dev, i);
+					if (start == 0)
+						break;
+					end = pci_resource_end(dev, i);
+					if (screen_info.lfb_base >= start &&
+					    screen_info.lfb_base < end) {
+						found_bar = 1;
+					}
+				}
+			}
+			if (!found_bar)
+				screen_info.lfb_base = 0;
+#endif
+		}
+	}
+	if (screen_info.lfb_base) {
+		screen_info.lfb_linelength = choose_value(info->stride,
+			screen_info.lfb_linelength, OVERRIDE_STRIDE,
+			info->flags);
+		screen_info.lfb_width = choose_value(info->width,
+			screen_info.lfb_width, OVERRIDE_WIDTH,
+			info->flags);
+		screen_info.lfb_height = choose_value(info->height,
+			screen_info.lfb_height, OVERRIDE_HEIGHT,
+			info->flags);
+		if (screen_info.orig_video_isVGA == 0)
+			screen_info.orig_video_isVGA = VIDEO_TYPE_EFI;
+	} else {
+		screen_info.lfb_linelength = 0;
+		screen_info.lfb_width = 0;
+		screen_info.lfb_height = 0;
+		screen_info.orig_video_isVGA = 0;
+		return 0;
+	}
+
+	printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x "
+			 "(%dx%d, stride %d)\n", id->ident,
+			 screen_info.lfb_base, screen_info.lfb_width,
+			 screen_info.lfb_height, screen_info.lfb_linelength);
+
+	return 1;
+}
+
+#define EFIFB_DMI_SYSTEM_ID(vendor, name, enumid)		\
+	{							\
+		efifb_set_system,				\
+		name,						\
+		{						\
+			DMI_MATCH(DMI_BIOS_VENDOR, vendor),	\
+			DMI_MATCH(DMI_PRODUCT_NAME, name)	\
+		},						\
+		&efifb_dmi_list[enumid]				\
+	}
+
+static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17),
+	/* At least one of these two will be right; maybe both? */
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac5,1", M_I20),
+	/* At least one of these two will be right; maybe both? */
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB),
+	/* At least one of these two will be right; maybe both? */
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook2,1", M_MB),
+	/* At least one of these two will be right; maybe both? */
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir3,1", M_MBA_3),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2),
+	EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1),
+	EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro8,2", M_MBP_8_2),
+	{},
+};
+
+__init void sysfb_apply_efi_quirks(void)
+{
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+	    !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+		dmi_check_system(efifb_dmi_system_table);
+}
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
new file mode 100644
index 000000000000..22513e96b012
--- /dev/null
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -0,0 +1,95 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * simple-framebuffer probing
+ * Try to convert "screen_info" into a "simple-framebuffer" compatible mode.
+ * If the mode is incompatible, we return "false" and let the caller create
+ * legacy nodes instead.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static const char simplefb_resname[] = "BOOTFB";
+static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
+
+/* try parsing x86 screen_info into a simple-framebuffer mode struct */
+__init bool parse_mode(const struct screen_info *si,
+		       struct simplefb_platform_data *mode)
+{
+	const struct simplefb_format *f;
+	__u8 type;
+	unsigned int i;
+
+	type = si->orig_video_isVGA;
+	if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
+		f = &formats[i];
+		if (si->lfb_depth == f->bits_per_pixel &&
+		    si->red_size == f->red.length &&
+		    si->red_pos == f->red.offset &&
+		    si->green_size == f->green.length &&
+		    si->green_pos == f->green.offset &&
+		    si->blue_size == f->blue.length &&
+		    si->blue_pos == f->blue.offset &&
+		    si->rsvd_size == f->transp.length &&
+		    si->rsvd_pos == f->transp.offset) {
+			mode->format = f->name;
+			mode->width = si->lfb_width;
+			mode->height = si->lfb_height;
+			mode->stride = si->lfb_linelength;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+__init int create_simplefb(const struct screen_info *si,
+			   const struct simplefb_platform_data *mode)
+{
+	struct platform_device *pd;
+	struct resource res;
+	unsigned long len;
+
+	/* don't use lfb_size as it may contain the whole VMEM instead of only
+	 * the part that is occupied by the framebuffer */
+	len = mode->height * mode->stride;
+	len = PAGE_ALIGN(len);
+	if (len > si->lfb_size << 16) {
+		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
+		return -EINVAL;
+	}
+
+	/* setup IORESOURCE_MEM as framebuffer memory */
+	memset(&res, 0, sizeof(res));
+	res.flags = IORESOURCE_MEM;
+	res.name = simplefb_resname;
+	res.start = si->lfb_base;
+	res.end = si->lfb_base + len - 1;
+	if (res.end <= res.start)
+		return -EINVAL;
+
+	pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0,
+					       &res, 1, mode, sizeof(*mode));
+	if (IS_ERR(pd))
+		return PTR_ERR(pd);
+
+	return 0;
+}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index addf7b58f4e8..91a4496db434 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -301,6 +301,15 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
 	return 0;
 }
 
+static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
+{
+	if (!tboot_enabled())
+		return 0;
+
+	pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+	return -ENODEV;
+}
+
 static atomic_t ap_wfs_count;
 
 static int tboot_wait_for_aps(int num_aps)
@@ -422,6 +431,7 @@ static __init int tboot_late_init(void)
 #endif
 
 	acpi_os_set_prepare_sleep(&tboot_sleep);
+	acpi_os_set_prepare_extended_sleep(&tboot_extended_sleep);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1b23a1c92746..8c8093b146ca 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,6 +58,7 @@
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
+#include <asm/alternative.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -327,6 +328,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	    ftrace_int3_handler(regs))
 		return;
 #endif
+	if (poke_int3_handler(regs))
+		return;
+
 	prev_state = exception_enter();
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6ff49247edf8..930e5d48f560 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
+int check_tsc_disabled(void)
+{
+	return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..b110fe6c03d4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_PV_EOI) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+			     (1 << KVM_FEATURE_PV_UNHALT);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afc11245827c..5439117d5c4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -79,16 +79,6 @@ static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 	*((u32 *) (apic->regs + reg_off)) = val;
 }
 
-static inline int apic_test_and_set_vector(int vec, void *bitmap)
-{
-	return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
-static inline int apic_test_and_clear_vector(int vec, void *bitmap)
-{
-	return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
 static inline int apic_test_vector(int vec, void *bitmap)
 {
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -331,10 +321,10 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
 }
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
-static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
+static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
 	apic->irr_pending = true;
-	return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
+	apic_set_vector(vec, apic->regs + APIC_IRR);
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -681,32 +671,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (unlikely(!apic_enabled(apic)))
 			break;
 
+		result = 1;
+
 		if (dest_map)
 			__set_bit(vcpu->vcpu_id, dest_map);
 
-		if (kvm_x86_ops->deliver_posted_interrupt) {
-			result = 1;
+		if (kvm_x86_ops->deliver_posted_interrupt)
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
-		} else {
-			result = !apic_test_and_set_irr(vector, apic);
-
-			if (!result) {
-				if (trig_mode)
-					apic_debug("level trig mode repeatedly "
-						"for vector %d", vector);
-				goto out;
-			}
+		else {
+			apic_set_irr(vector, apic);
 
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		}
-out:
 		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
-				trig_mode, vector, !result);
+					  trig_mode, vector, false);
 		break;
 
 	case APIC_DM_REMRD:
-		apic_debug("Ignoring delivery mode 3\n");
+		result = 1;
+		vcpu->arch.pv.pv_unhalted = 1;
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_SMI:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9e9285ae9b94..6e2d2c8f230b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -132,8 +132,8 @@ module_param(dbg, bool, 0644);
 	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
 					    * PT32_LEVEL_BITS))) - 1))
 
-#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
-			| PT64_NX_MASK)
+#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
+			| shadow_x_mask | shadow_nx_mask)
 
 #define ACC_EXEC_MASK    1
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
@@ -331,11 +331,6 @@ static int is_large_pte(u64 pte)
 	return pte & PT_PAGE_SIZE_MASK;
 }
 
-static int is_dirty_gpte(unsigned long pte)
-{
-	return pte & PT_DIRTY_MASK;
-}
-
 static int is_rmap_spte(u64 pte)
 {
 	return is_shadow_present_pte(pte);
@@ -2052,12 +2047,18 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }
 
-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
+static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
 {
 	u64 spte;
 
+	BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK ||
+			VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
+
 	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+	       shadow_user_mask | shadow_x_mask;
+
+	if (accessed)
+		spte |= shadow_accessed_mask;
 
 	mmu_spte_set(sptep, spte);
 }
@@ -2574,14 +2575,6 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 	mmu_free_roots(vcpu);
 }
 
-static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
-{
-	int bit7;
-
-	bit7 = (gpte >> 7) & 1;
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
-}
-
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 				     bool no_dirty_log)
 {
@@ -2594,26 +2587,6 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return gfn_to_pfn_memslot_atomic(slot, gfn);
 }
 
-static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp, u64 *spte,
-				  u64 gpte)
-{
-	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
-		goto no_present;
-
-	if (!is_present_gpte(gpte))
-		goto no_present;
-
-	if (!(gpte & PT_ACCESSED_MASK))
-		goto no_present;
-
-	return false;
-
-no_present:
-	drop_spte(vcpu->kvm, spte);
-	return true;
-}
-
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 				    struct kvm_mmu_page *sp,
 				    u64 *start, u64 *end)
@@ -2710,7 +2683,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
 
-			link_shadow_page(iterator.sptep, sp);
+			link_shadow_page(iterator.sptep, sp, true);
 		}
 	}
 	return emulate;
@@ -2808,7 +2781,7 @@ exit:
 	return ret;
 }
 
-static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
+static bool page_fault_can_be_fast(u32 error_code)
 {
 	/*
 	 * Do not fix the mmio spte with invalid generation number which
@@ -2861,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	bool ret = false;
 	u64 spte = 0ull;
 
-	if (!page_fault_can_be_fast(vcpu, error_code))
+	if (!page_fault_can_be_fast(error_code))
 		return false;
 
 	walk_shadow_page_lockless_begin(vcpu);
@@ -3209,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	mmu_sync_roots(vcpu);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
 				  u32 access, struct x86_exception *exception)
@@ -3478,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 	++vcpu->stat.tlb_flush;
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
@@ -3501,18 +3476,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
-static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
-{
-	unsigned mask;
-
-	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
-
-	mask = (unsigned)~ACC_WRITE_MASK;
-	/* Allow write access to dirty gptes */
-	mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
-	*access &= mask;
-}
-
 static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
 			   unsigned access, int *nr_present)
 {
@@ -3530,16 +3493,6 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
 	return false;
 }
 
-static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
-{
-	unsigned access;
-
-	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-	access &= ~(gpte >> PT64_NX_SHIFT);
-
-	return access;
-}
-
 static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
 {
 	unsigned index;
@@ -3549,6 +3502,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 	return mmu->last_pte_bitmap & (1 << index);
 }
 
+#define PTTYPE_EPT 18 /* arbitrary */
+#define PTTYPE PTTYPE_EPT
+#include "paging_tmpl.h"
+#undef PTTYPE
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -3563,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 
+	context->bad_mt_xwr = 0;
+
 	if (!context->nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
 	switch (context->root_level) {
@@ -3618,7 +3578,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	int pte;
+
+	context->rsvd_bits_mask[0][3] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
+	context->rsvd_bits_mask[0][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+
+	/* large page */
+	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+	context->rsvd_bits_mask[1][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
+	context->rsvd_bits_mask[1][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+
+	for (pte = 0; pte < 64; pte++) {
+		int rwx_bits = pte & 7;
+		int mt = pte >> 3;
+		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
+				rwx_bits == 0x2 || rwx_bits == 0x6 ||
+				(rwx_bits == 0x4 && !execonly))
+			context->bad_mt_xwr |= (1ull << pte);
+	}
+}
+
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *mmu, bool ept)
 {
 	unsigned bit, byte, pfec;
 	u8 map;
@@ -3636,12 +3629,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
 			w = bit & ACC_WRITE_MASK;
 			u = bit & ACC_USER_MASK;
 
-			/* Not really needed: !nx will cause pte.nx to fault */
-			x |= !mmu->nx;
-			/* Allow supervisor writes if !cr0.wp */
-			w |= !is_write_protection(vcpu) && !uf;
-			/* Disallow supervisor fetches of user code if cr4.smep */
-			x &= !(smep && u && !uf);
+			if (!ept) {
+				/* Not really needed: !nx will cause pte.nx to fault */
+				x |= !mmu->nx;
+				/* Allow supervisor writes if !cr0.wp */
+				w |= !is_write_protection(vcpu) && !uf;
+				/* Disallow supervisor fetches of user code if cr4.smep */
+				x &= !(smep && u && !uf);
+			} else
+				/* Not really needed: no U/S accesses on ept  */
+				u = 1;
 
 			fault = (ff && !x) || (uf && !u) || (wf && !w);
 			map |= fault << bit;
@@ -3676,7 +3673,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->root_level = level;
 
 	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
@@ -3706,7 +3703,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 	context->root_level = PT32_ROOT_LEVEL;
 
 	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
@@ -3768,7 +3765,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	return 0;
@@ -3800,6 +3797,33 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
+int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+		bool execonly)
+{
+	ASSERT(vcpu);
+	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+
+	context->nx = true;
+	context->new_cr3 = paging_new_cr3;
+	context->page_fault = ept_page_fault;
+	context->gva_to_gpa = ept_gva_to_gpa;
+	context->sync_page = ept_sync_page;
+	context->invlpg = ept_invlpg;
+	context->update_pte = ept_update_pte;
+	context->free = paging_free;
+	context->root_level = context->shadow_root_level;
+	context->root_hpa = INVALID_PAGE;
+	context->direct_map = false;
+
+	update_permission_bitmask(vcpu, context, true);
+	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
+
 static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
 	int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
@@ -3847,7 +3871,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
 
-	update_permission_bitmask(vcpu, g_context);
+	update_permission_bitmask(vcpu, g_context, false);
 	update_last_pte_bitmap(vcpu, g_context);
 
 	return 0;
@@ -3923,8 +3947,8 @@ static bool need_remote_flush(u64 old, u64 new)
 		return true;
 	if ((old ^ new) & PT64_BASE_ADDR_MASK)
 		return true;
-	old ^= PT64_NX_MASK;
-	new ^= PT64_NX_MASK;
+	old ^= shadow_nx_mask;
+	new ^= shadow_nx_mask;
 	return (old & ~new & PT64_PERM_MASK) != 0;
 }
 
@@ -4182,7 +4206,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 	switch (er) {
 	case EMULATE_DONE:
 		return 1;
-	case EMULATE_DO_MMIO:
+	case EMULATE_USER_EXIT:
 		++vcpu->stat.mmio_exits;
 		/* fall through */
 	case EMULATE_FAIL:
@@ -4390,11 +4414,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
 	/*
 	 * The very rare case: if the generation-number is round,
 	 * zap all shadow pages.
-	 *
-	 * The max value is MMIO_MAX_GEN - 1 since it is not called
-	 * when mark memslot invalid.
 	 */
-	if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
+	if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
 		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5b59c573aba7..77e044a0f5f7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,6 +71,8 @@ enum {
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+		bool execonly);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7769699d48a8..043330159179 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -23,6 +23,13 @@
  * so the code in this file is compiled twice, once per pte size.
  */
 
+/*
+ * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
+ * uses for EPT without A/D paging type.
+ */
+extern u64 __pure __using_nonexistent_pte_bit(void)
+	       __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
+
 #if PTTYPE == 64
 	#define pt_element_t u64
 	#define guest_walker guest_walker64
@@ -32,6 +39,10 @@
 	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
 	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
 	#define PT_LEVEL_BITS PT64_LEVEL_BITS
+	#define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
+	#define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
+	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
+	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
 	#ifdef CONFIG_X86_64
 	#define PT_MAX_FULL_LEVELS 4
 	#define CMPXCHG cmpxchg
@@ -49,7 +60,26 @@
 	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
 	#define PT_LEVEL_BITS PT32_LEVEL_BITS
 	#define PT_MAX_FULL_LEVELS 2
+	#define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
+	#define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
+	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
+	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
 	#define CMPXCHG cmpxchg
+#elif PTTYPE == PTTYPE_EPT
+	#define pt_element_t u64
+	#define guest_walker guest_walkerEPT
+	#define FNAME(name) ept_##name
+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_BITS PT64_LEVEL_BITS
+	#define PT_GUEST_ACCESSED_MASK 0
+	#define PT_GUEST_DIRTY_MASK 0
+	#define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
+	#define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
+	#define CMPXCHG cmpxchg64
+	#define PT_MAX_FULL_LEVELS 4
 #else
 	#error Invalid PTTYPE value
 #endif
@@ -80,6 +110,40 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
 	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
 }
 
+static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
+{
+	unsigned mask;
+
+	/* dirty bit is not supported, so no need to track it */
+	if (!PT_GUEST_DIRTY_MASK)
+		return;
+
+	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+	mask = (unsigned)~ACC_WRITE_MASK;
+	/* Allow write access to dirty gptes */
+	mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
+		PT_WRITABLE_MASK;
+	*access &= mask;
+}
+
+static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
+
+	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
+		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
+}
+
+static inline int FNAME(is_present_gpte)(unsigned long pte)
+{
+#if PTTYPE != PTTYPE_EPT
+	return is_present_gpte(pte);
+#else
+	return pte & 7;
+#endif
+}
+
 static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			       pt_element_t __user *ptep_user, unsigned index,
 			       pt_element_t orig_pte, pt_element_t new_pte)
@@ -103,6 +167,42 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
+static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu_page *sp, u64 *spte,
+				  u64 gpte)
+{
+	if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+		goto no_present;
+
+	if (!FNAME(is_present_gpte)(gpte))
+		goto no_present;
+
+	/* if accessed bit is not supported prefetch non accessed gpte */
+	if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK))
+		goto no_present;
+
+	return false;
+
+no_present:
+	drop_spte(vcpu->kvm, spte);
+	return true;
+}
+
+static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
+{
+	unsigned access;
+#if PTTYPE == PTTYPE_EPT
+	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
+		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
+		ACC_USER_MASK;
+#else
+	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+	access &= ~(gpte >> PT64_NX_SHIFT);
+#endif
+
+	return access;
+}
+
 static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 					     struct kvm_mmu *mmu,
 					     struct guest_walker *walker,
@@ -114,18 +214,23 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 	gfn_t table_gfn;
 	int ret;
 
+	/* dirty/accessed bits are not supported, so no need to update them */
+	if (!PT_GUEST_DIRTY_MASK)
+		return 0;
+
 	for (level = walker->max_level; level >= walker->level; --level) {
 		pte = orig_pte = walker->ptes[level - 1];
 		table_gfn = walker->table_gfn[level - 1];
 		ptep_user = walker->ptep_user[level - 1];
 		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
-		if (!(pte & PT_ACCESSED_MASK)) {
+		if (!(pte & PT_GUEST_ACCESSED_MASK)) {
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
-			pte |= PT_ACCESSED_MASK;
+			pte |= PT_GUEST_ACCESSED_MASK;
 		}
-		if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
+		if (level == walker->level && write_fault &&
+				!(pte & PT_GUEST_DIRTY_MASK)) {
 			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-			pte |= PT_DIRTY_MASK;
+			pte |= PT_GUEST_DIRTY_MASK;
 		}
 		if (pte == orig_pte)
 			continue;
@@ -170,7 +275,7 @@ retry_walk:
 	if (walker->level == PT32E_ROOT_LEVEL) {
 		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
-		if (!is_present_gpte(pte))
+		if (!FNAME(is_present_gpte)(pte))
 			goto error;
 		--walker->level;
 	}
@@ -179,7 +284,7 @@ retry_walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
-	accessed_dirty = PT_ACCESSED_MASK;
+	accessed_dirty = PT_GUEST_ACCESSED_MASK;
 	pt_access = pte_access = ACC_ALL;
 	++walker->level;
 
@@ -215,17 +320,17 @@ retry_walk:
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
-		if (unlikely(!is_present_gpte(pte)))
+		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
-		if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
-					      walker->level))) {
+		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte,
+					             walker->level))) {
 			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
 
 		accessed_dirty &= pte;
-		pte_access = pt_access & gpte_access(vcpu, pte);
+		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
@@ -248,13 +353,15 @@ retry_walk:
 	walker->gfn = real_gpa >> PAGE_SHIFT;
 
 	if (!write_fault)
-		protect_clean_gpte(&pte_access, pte);
+		FNAME(protect_clean_gpte)(&pte_access, pte);
 	else
 		/*
-		 * On a write fault, fold the dirty bit into accessed_dirty by
-		 * shifting it one place right.
+		 * On a write fault, fold the dirty bit into accessed_dirty.
+		 * For modes without A/D bits support accessed_dirty will be
+		 * always clear.
 		 */
-		accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
+		accessed_dirty &= pte >>
+			(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
 
 	if (unlikely(!accessed_dirty)) {
 		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
@@ -279,6 +386,25 @@ error:
 	walker->fault.vector = PF_VECTOR;
 	walker->fault.error_code_valid = true;
 	walker->fault.error_code = errcode;
+
+#if PTTYPE == PTTYPE_EPT
+	/*
+	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
+	 * misconfiguration requires to be injected. The detection is
+	 * done by is_rsvd_bits_set() above.
+	 *
+	 * We set up the value of exit_qualification to inject:
+	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+	 * [5:3] - Calculated by the page walk of the guest EPT page tables
+	 * [7:8] - Derived from [7:8] of real exit_qualification
+	 *
+	 * The other bits are set to 0.
+	 */
+	if (!(errcode & PFERR_RSVD_MASK)) {
+		vcpu->arch.exit_qualification &= 0x187;
+		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+	}
+#endif
 	walker->fault.address = addr;
 	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
 
@@ -293,6 +419,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 					access);
 }
 
+#if PTTYPE != PTTYPE_EPT
 static int FNAME(walk_addr_nested)(struct guest_walker *walker,
 				   struct kvm_vcpu *vcpu, gva_t addr,
 				   u32 access)
@@ -300,6 +427,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
 	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
 					addr, access);
 }
+#endif
 
 static bool
 FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -309,14 +437,14 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	gfn_t gfn;
 	pfn_t pfn;
 
-	if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
+	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 		return false;
 
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 
 	gfn = gpte_to_gfn(gpte);
-	pte_access = sp->role.access & gpte_access(vcpu, gpte);
-	protect_clean_gpte(&pte_access, gpte);
+	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	FNAME(protect_clean_gpte)(&pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
 	if (is_error_pfn(pfn))
@@ -446,7 +574,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			goto out_gpte_changed;
 
 		if (sp)
-			link_shadow_page(it.sptep, sp);
+			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
 	}
 
 	for (;
@@ -466,7 +594,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
 				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp);
+		link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
 	}
 
 	clear_sp_write_flooding_count(it.sptep);
@@ -727,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
 	return gpa;
 }
 
+#if PTTYPE != PTTYPE_EPT
 static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
 				      u32 access,
 				      struct x86_exception *exception)
@@ -745,6 +874,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
 
 	return gpa;
 }
+#endif
 
 /*
  * Using the cached information from sp->gfns is safe because:
@@ -785,15 +915,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 					  sizeof(pt_element_t)))
 			return -EINVAL;
 
-		if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
+		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
 			vcpu->kvm->tlbs_dirty++;
 			continue;
 		}
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= gpte_access(vcpu, gpte);
-		protect_clean_gpte(&pte_access, gpte);
+		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		FNAME(protect_clean_gpte)(&pte_access, gpte);
 
 		if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
 		      &nr_present))
@@ -830,3 +960,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 #undef gpte_to_gfn
 #undef gpte_to_gfn_lvl
 #undef CMPXCHG
+#undef PT_GUEST_ACCESSED_MASK
+#undef PT_GUEST_DIRTY_MASK
+#undef PT_GUEST_DIRTY_SHIFT
+#undef PT_GUEST_ACCESSED_SHIFT
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index c53e797e7369..5c4f63151b4d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -160,7 +160,7 @@ static void stop_counter(struct kvm_pmc *pmc)
 
 static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
 		unsigned config, bool exclude_user, bool exclude_kernel,
-		bool intr)
+		bool intr, bool in_tx, bool in_tx_cp)
 {
 	struct perf_event *event;
 	struct perf_event_attr attr = {
@@ -173,6 +173,10 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
 		.exclude_kernel = exclude_kernel,
 		.config = config,
 	};
+	if (in_tx)
+		attr.config |= HSW_IN_TX;
+	if (in_tx_cp)
+		attr.config |= HSW_IN_TX_CHECKPOINTED;
 
 	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
 
@@ -226,7 +230,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 
 	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
 				ARCH_PERFMON_EVENTSEL_INV |
-				ARCH_PERFMON_EVENTSEL_CMASK))) {
+				ARCH_PERFMON_EVENTSEL_CMASK |
+				HSW_IN_TX |
+				HSW_IN_TX_CHECKPOINTED))) {
 		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
 				unit_mask);
 		if (config != PERF_COUNT_HW_MAX)
@@ -239,7 +245,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	reprogram_counter(pmc, type, config,
 			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
 			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-			eventsel & ARCH_PERFMON_EVENTSEL_INT);
+			eventsel & ARCH_PERFMON_EVENTSEL_INT,
+			(eventsel & HSW_IN_TX),
+			(eventsel & HSW_IN_TX_CHECKPOINTED));
 }
 
 static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
@@ -256,7 +264,7 @@ static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
 			arch_events[fixed_pmc_events[idx]].event_type,
 			!(en & 0x2), /* exclude user */
 			!(en & 0x1), /* exclude kernel */
-			pmi);
+			pmi, false, false);
 }
 
 static inline u8 fixed_en_pmi(u64 ctrl, int idx)
@@ -408,7 +416,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
 			if (data == pmc->eventsel)
 				return 0;
-			if (!(data & 0xffffffff00200000ull)) {
+			if (!(data & pmu->reserved_bits)) {
 				reprogram_gp_counter(pmc, data);
 				return 0;
 			}
@@ -450,6 +458,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 	pmu->counter_bitmask[KVM_PMC_GP] = 0;
 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 	pmu->version = 0;
+	pmu->reserved_bits = 0xffffffff00200000ull;
 
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 	if (!entry)
@@ -478,6 +487,12 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
 		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
 	pmu->global_ctrl_mask = ~pmu->global_ctrl;
+
+	entry = kvm_find_cpuid_entry(vcpu, 7, 0);
+	if (entry &&
+	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
+	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
+		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
 }
 
 void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 064d0be67ecc..1f1da43ff2a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -373,6 +373,7 @@ struct nested_vmx {
 	 * we must keep them pinned while L2 runs.
 	 */
 	struct page *apic_access_page;
+	u64 msr_ia32_feature_control;
 };
 
 #define POSTED_INTR_ON  0
@@ -711,10 +712,10 @@ static void nested_release_page_clean(struct page *page)
 	kvm_release_page_clean(page);
 }
 
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -1039,12 +1040,16 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
 		(vmcs12->secondary_vm_exec_control & bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
-	struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
 {
 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
 
+static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
+}
+
 static inline bool is_exception(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2155,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
 static u32 nested_vmx_misc_low, nested_vmx_misc_high;
+static u32 nested_vmx_ept_caps;
 static __init void nested_vmx_setup_ctls_msrs(void)
 {
 	/*
@@ -2190,14 +2196,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	 * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
 	 * 17 must be 1.
 	 */
+	rdmsr(MSR_IA32_VMX_EXIT_CTLS,
+		nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
 	nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 	/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
+	nested_vmx_exit_ctls_high &=
 #ifdef CONFIG_X86_64
-	nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
-#else
-	nested_vmx_exit_ctls_high = 0;
+		VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
-	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
+		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+				      VM_EXIT_LOAD_IA32_EFER);
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2205,8 +2214,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	/* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
 	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 	nested_vmx_entry_ctls_high &=
-		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
-	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
+#ifdef CONFIG_X86_64
+		VM_ENTRY_IA32E_MODE |
+#endif
+		VM_ENTRY_LOAD_IA32_PAT;
+	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
+				       VM_ENTRY_LOAD_IA32_EFER);
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2241,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_WBINVD_EXITING;
 
+	if (enable_ept) {
+		/* nested EPT: emulate EPT also to L1 */
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+			 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
+		nested_vmx_ept_caps &= vmx_capability.ept;
+		/*
+		 * Since invept is completely emulated we support both global
+		 * and context invalidation independent of what host cpu
+		 * supports
+		 */
+		nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+			VMX_EPT_EXTENT_CONTEXT_BIT;
+	} else
+		nested_vmx_ept_caps = 0;
+
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
 	nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
@@ -2282,8 +2311,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
 	switch (msr_index) {
 	case MSR_IA32_FEATURE_CONTROL:
-		*pdata = 0;
-		break;
+		if (nested_vmx_allowed(vcpu)) {
+			*pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+			break;
+		}
+		return 0;
 	case MSR_IA32_VMX_BASIC:
 		/*
 		 * This MSR reports some information about VMX support. We
@@ -2346,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 					nested_vmx_secondary_ctls_high);
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
-		/* Currently, no nested ept or nested vpid */
-		*pdata = 0;
+		/* Currently, no nested vpid support */
+		*pdata = nested_vmx_ept_caps;
 		break;
 	default:
 		return 0;
@@ -2356,14 +2388,24 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return 1;
 }
 
-static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+	u32 msr_index = msr_info->index;
+	u64 data = msr_info->data;
+	bool host_initialized = msr_info->host_initiated;
+
 	if (!nested_vmx_allowed(vcpu))
 		return 0;
 
-	if (msr_index == MSR_IA32_FEATURE_CONTROL)
-		/* TODO: the right thing. */
+	if (msr_index == MSR_IA32_FEATURE_CONTROL) {
+		if (!host_initialized &&
+				to_vmx(vcpu)->nested.msr_ia32_feature_control
+				& FEATURE_CONTROL_LOCKED)
+			return 0;
+		to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
 		return 1;
+	}
+
 	/*
 	 * No need to treat VMX capability MSRs specially: If we don't handle
 	 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
@@ -2494,7 +2536,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		/* Otherwise falls through */
 	default:
-		if (vmx_set_vmx_msr(vcpu, msr_index, data))
+		if (vmx_set_vmx_msr(vcpu, msr_info))
 			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
@@ -5302,9 +5344,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 
 	/* It is a write fault? */
 	error_code = exit_qualification & (1U << 1);
+	/* It is a fetch fault? */
+	error_code |= (exit_qualification & (1U << 2)) << 2;
 	/* ept page table is present? */
 	error_code |= (exit_qualification >> 3) & 0x1;
 
+	vcpu->arch.exit_qualification = exit_qualification;
+
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
@@ -5438,7 +5484,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
 		err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
-		if (err == EMULATE_DO_MMIO) {
+		if (err == EMULATE_USER_EXIT) {
+			++vcpu->stat.mmio_exits;
 			ret = 0;
 			goto out;
 		}
@@ -5567,8 +5614,47 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
 		free_loaded_vmcs(&vmx->vmcs01);
 }
 
+/*
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
+ * set the success or error code of an emulated VMX instruction, as specified
+ * by Vol 2B, VMX Instruction Reference, "Conventions".
+ */
+static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
+{
+	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
+			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
+}
+
+static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
+{
+	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
+			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+			    X86_EFLAGS_SF | X86_EFLAGS_OF))
+			| X86_EFLAGS_CF);
+}
+
 static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
-				 u32 vm_instruction_error);
+					u32 vm_instruction_error)
+{
+	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
+		/*
+		 * failValid writes the error number to the current VMCS, which
+		 * can't be done there isn't a current VMCS.
+		 */
+		nested_vmx_failInvalid(vcpu);
+		return;
+	}
+	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
+			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+			    X86_EFLAGS_SF | X86_EFLAGS_OF))
+			| X86_EFLAGS_ZF);
+	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
+	/*
+	 * We don't need to force a shadow sync because
+	 * VM_INSTRUCTION_ERROR is not shadowed
+	 */
+}
 
 /*
  * Emulate the VMXON instruction.
@@ -5583,6 +5669,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	struct kvm_segment cs;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs *shadow_vmcs;
+	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
+		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 
 	/* The Intel VMX Instruction Reference lists a bunch of bits that
 	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
@@ -5611,6 +5699,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		skip_emulated_instruction(vcpu);
 		return 1;
 	}
+
+	if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+			!= VMXON_NEEDED_FEATURES) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
 	if (enable_shadow_vmcs) {
 		shadow_vmcs = alloc_vmcs();
 		if (!shadow_vmcs)
@@ -5628,6 +5723,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
+	nested_vmx_succeed(vcpu);
 	return 1;
 }
 
@@ -5712,6 +5808,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 		return 1;
 	free_nested(to_vmx(vcpu));
 	skip_emulated_instruction(vcpu);
+	nested_vmx_succeed(vcpu);
 	return 1;
 }
 
@@ -5768,48 +5865,6 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-/*
- * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
- * set the success or error code of an emulated VMX instruction, as specified
- * by Vol 2B, VMX Instruction Reference, "Conventions".
- */
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
-{
-	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
-			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
-			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
-}
-
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
-{
-	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
-			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
-			    X86_EFLAGS_SF | X86_EFLAGS_OF))
-			| X86_EFLAGS_CF);
-}
-
-static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
-					u32 vm_instruction_error)
-{
-	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
-		/*
-		 * failValid writes the error number to the current VMCS, which
-		 * can't be done there isn't a current VMCS.
-		 */
-		nested_vmx_failInvalid(vcpu);
-		return;
-	}
-	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
-			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
-			    X86_EFLAGS_SF | X86_EFLAGS_OF))
-			| X86_EFLAGS_ZF);
-	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
-	/*
-	 * We don't need to force a shadow sync because
-	 * VM_INSTRUCTION_ERROR is not shadowed
-	 */
-}
-
 /* Emulate the VMCLEAR instruction */
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
@@ -5972,8 +6027,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	unsigned long field;
 	u64 field_value;
 	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
-	unsigned long *fields = (unsigned long *)shadow_read_write_fields;
-	int num_fields = max_shadow_read_write_fields;
+	const unsigned long *fields = shadow_read_write_fields;
+	const int num_fields = max_shadow_read_write_fields;
 
 	vmcs_load(shadow_vmcs);
 
@@ -6002,12 +6057,11 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 {
-	unsigned long *fields[] = {
-		(unsigned long *)shadow_read_write_fields,
-		(unsigned long *)shadow_read_only_fields
+	const unsigned long *fields[] = {
+		shadow_read_write_fields,
+		shadow_read_only_fields
 	};
-	int num_lists =  ARRAY_SIZE(fields);
-	int max_fields[] = {
+	const int max_fields[] = {
 		max_shadow_read_write_fields,
 		max_shadow_read_only_fields
 	};
@@ -6018,7 +6072,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 
 	vmcs_load(shadow_vmcs);
 
-	for (q = 0; q < num_lists; q++) {
+	for (q = 0; q < ARRAY_SIZE(fields); q++) {
 		for (i = 0; i < max_fields[q]; i++) {
 			field = fields[q][i];
 			vmcs12_read_any(&vmx->vcpu, field, &field_value);
@@ -6248,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+	u32 vmx_instruction_info, types;
+	unsigned long type;
+	gva_t gva;
+	struct x86_exception e;
+	struct {
+		u64 eptp, gpa;
+	} operand;
+	u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
+
+	if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
+	    !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+	types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+
+	if (!(types & (1UL << type))) {
+		nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		return 1;
+	}
+
+	/* According to the Intel VMX instruction reference, the memory
+	 * operand is read even if it isn't needed (e.g., for type==global)
+	 */
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+			vmx_instruction_info, &gva))
+		return 1;
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+				sizeof(operand), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	switch (type) {
+	case VMX_EPT_EXTENT_CONTEXT:
+		if ((operand.eptp & eptp_mask) !=
+				(nested_ept_get_cr3(vcpu) & eptp_mask))
+			break;
+	case VMX_EPT_EXTENT_GLOBAL:
+		kvm_mmu_sync_roots(vcpu);
+		kvm_mmu_flush_tlb(vcpu);
+		nested_vmx_succeed(vcpu);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6292,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+	[EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6518,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
 	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+	case EXIT_REASON_INVEPT:
 		/*
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -6550,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return nested_cpu_has2(vmcs12,
 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 	case EXIT_REASON_EPT_VIOLATION:
+		/*
+		 * L0 always deals with the EPT violation. If nested EPT is
+		 * used, and the nested mmu code discovers that the address is
+		 * missing in the guest EPT table (EPT12), the EPT violation
+		 * will be injected with nested_ept_inject_page_fault()
+		 */
+		return 0;
 	case EXIT_REASON_EPT_MISCONFIG:
+		/*
+		 * L2 never uses directly L1's EPT, but rather L0's own EPT
+		 * table (shadow on EPT) or a merged EPT table that L0 built
+		 * (EPT on EPT). So any problems with the structure of the
+		 * table is L0's fault.
+		 */
 		return 0;
 	case EXIT_REASON_PREEMPTION_TIMER:
 		return vmcs12->pin_based_vm_exec_control &
@@ -6638,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
 	    !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
-	                                get_vmcs12(vcpu), vcpu)))) {
+					get_vmcs12(vcpu))))) {
 		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
@@ -7326,6 +7463,48 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 		entry->ecx |= bit(X86_FEATURE_VMX);
 }
 
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
+		struct x86_exception *fault)
+{
+	struct vmcs12 *vmcs12;
+	nested_vmx_vmexit(vcpu);
+	vmcs12 = get_vmcs12(vcpu);
+
+	if (fault->error_code & PFERR_RSVD_MASK)
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+	else
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+	vmcs12->exit_qualification = vcpu->arch.exit_qualification;
+	vmcs12->guest_physical_address = fault->address;
+}
+
+/* Callbacks for nested_ept_init_mmu_context: */
+
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
+{
+	/* return the page table to be shadowed - in our case, EPT12 */
+	return get_vmcs12(vcpu)->ept_pointer;
+}
+
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+	int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+			nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
+
+	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
+	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
+	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+
+	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
+
+	return r;
+}
+
+static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7388,7 +7567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmcs12->guest_interruptibility_info);
 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
 	kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
-	vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
+	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
 		vmcs12->guest_pending_dbg_exceptions);
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
@@ -7508,15 +7687,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 
-	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
-	vmcs_write32(VM_EXIT_CONTROLS,
-		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
-	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
+	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
+	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
+	 * bits are further modified by vmx_set_efer() below.
+	 */
+	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
+	 * emulated by vmx_set_efer(), below.
+	 */
+	vmcs_write32(VM_ENTRY_CONTROLS,
+		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
+			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
-	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
-	else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+		vcpu->arch.pat = vmcs12->guest_ia32_pat;
+	} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
 
@@ -7538,6 +7726,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmx_flush_tlb(vcpu);
 	}
 
+	if (nested_cpu_has_ept(vmcs12)) {
+		kvm_mmu_unload(vcpu);
+		nested_ept_init_mmu_context(vcpu);
+	}
+
 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->guest_ia32_efer;
 	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@ -7565,6 +7758,16 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
 	kvm_mmu_reset_context(vcpu);
 
+	/*
+	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
+	 */
+	if (enable_ept) {
+		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+	}
+
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
 }
@@ -7887,6 +8090,22 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs12->guest_pending_dbg_exceptions =
 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+	/*
+	 * In some cases (usually, nested EPT), L2 is allowed to change its
+	 * own CR3 without exiting. If it has changed it, we must keep it.
+	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
+	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
+	 *
+	 * Additionally, restore L2's PDPTR to vmcs12.
+	 */
+	if (enable_ept) {
+		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
+		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+	}
+
 	vmcs12->vm_entry_controls =
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
 		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
@@ -7948,6 +8167,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 				   struct vmcs12 *vmcs12)
 {
+	struct kvm_segment seg;
+
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->host_ia32_efer;
 	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
@@ -7982,7 +8203,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
 	kvm_set_cr4(vcpu, vmcs12->host_cr4);
 
-	/* shadow page tables on either EPT or shadow page tables */
+	if (nested_cpu_has_ept(vmcs12))
+		nested_ept_uninit_mmu_context(vcpu);
+
 	kvm_set_cr3(vcpu, vmcs12->host_cr3);
 	kvm_mmu_reset_context(vcpu);
 
@@ -8001,23 +8224,61 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
-	vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base);
-	vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base);
-	vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base);
-	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector);
-	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector);
-	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector);
-	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector);
-	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector);
-	vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector);
-	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector);
-
-	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT)
+
+	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+		vcpu->arch.pat = vmcs12->host_ia32_pat;
+	}
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
 		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
 			vmcs12->host_ia32_perf_global_ctrl);
 
+	/* Set L1 segment info according to Intel SDM
+	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
+	seg = (struct kvm_segment) {
+		.base = 0,
+		.limit = 0xFFFFFFFF,
+		.selector = vmcs12->host_cs_selector,
+		.type = 11,
+		.present = 1,
+		.s = 1,
+		.g = 1
+	};
+	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
+		seg.l = 1;
+	else
+		seg.db = 1;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
+	seg = (struct kvm_segment) {
+		.base = 0,
+		.limit = 0xFFFFFFFF,
+		.type = 3,
+		.present = 1,
+		.s = 1,
+		.db = 1,
+		.g = 1
+	};
+	seg.selector = vmcs12->host_ds_selector;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
+	seg.selector = vmcs12->host_es_selector;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
+	seg.selector = vmcs12->host_ss_selector;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
+	seg.selector = vmcs12->host_fs_selector;
+	seg.base = vmcs12->host_fs_base;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
+	seg.selector = vmcs12->host_gs_selector;
+	seg.base = vmcs12->host_gs_base;
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
+	seg = (struct kvm_segment) {
+		.base = vmcs12->host_tr_base,
+		.limit = 0x67,
+		.selector = vmcs12->host_tr_selector,
+		.type = 11,
+		.present = 1
+	};
+	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d21bce505315..e5ca72a5cdb6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -682,17 +682,6 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 */
 	}
 
-	/*
-	 * Does the new cr3 value map to physical memory? (Note, we
-	 * catch an invalid cr3 even in real-mode, because it would
-	 * cause trouble later on when we turn on paging anyway.)
-	 *
-	 * A real CPU would silently accept an invalid cr3 and would
-	 * attempt to use it - with largely undefined (and often hard
-	 * to debug) behavior on the guest side.
-	 */
-	if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
-		return 1;
 	vcpu->arch.cr3 = cr3;
 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
 	vcpu->arch.mmu.new_cr3(vcpu);
@@ -850,7 +839,8 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+	MSR_IA32_FEATURE_CONTROL
 };
 
 static unsigned num_msrs_to_save;
@@ -1457,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 #endif
 }
 
+static void kvm_gen_update_masterclock(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm_arch *ka = &kvm->arch;
+
+	spin_lock(&ka->pvclock_gtod_sync_lock);
+	kvm_make_mclock_inprogress_request(kvm);
+	/* no guest entries from this point */
+	pvclock_update_vm_gtod_copy(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+
+	/* guest entries allowed */
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+
+	spin_unlock(&ka->pvclock_gtod_sync_lock);
+#endif
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags, this_tsc_khz;
@@ -3806,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		delta = user_ns.clock - now_ns;
 		local_irq_enable();
 		kvm->arch.kvmclock_offset = delta;
+		kvm_gen_update_masterclock(kvm);
 		break;
 	}
 	case KVM_GET_CLOCK: {
@@ -4955,6 +4969,97 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
 static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 
+static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
+				unsigned long *db)
+{
+	u32 dr6 = 0;
+	int i;
+	u32 enable, rwlen;
+
+	enable = dr7;
+	rwlen = dr7 >> 16;
+	for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
+		if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
+			dr6 |= (1 << i);
+	return dr6;
+}
+
+static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+
+	/*
+	 * Use the "raw" value to see if TF was passed to the processor.
+	 * Note that the new value of the flags has not been saved yet.
+	 *
+	 * This is correct even for TF set by the guest, because "the
+	 * processor will not generate this exception after the instruction
+	 * that sets the TF flag".
+	 */
+	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+
+	if (unlikely(rflags & X86_EFLAGS_TF)) {
+		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
+			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+			kvm_run->debug.arch.exception = DB_VECTOR;
+			kvm_run->exit_reason = KVM_EXIT_DEBUG;
+			*r = EMULATE_USER_EXIT;
+		} else {
+			vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+			/*
+			 * "Certain debug exceptions may clear bit 0-3.  The
+			 * remaining contents of the DR6 register are never
+			 * cleared by the processor".
+			 */
+			vcpu->arch.dr6 &= ~15;
+			vcpu->arch.dr6 |= DR6_BS;
+			kvm_queue_exception(vcpu, DB_VECTOR);
+		}
+	}
+}
+
+static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+	unsigned long eip = vcpu->arch.emulate_ctxt.eip;
+	u32 dr6 = 0;
+
+	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
+	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
+		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+					   vcpu->arch.guest_debug_dr7,
+					   vcpu->arch.eff_db);
+
+		if (dr6 != 0) {
+			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+			kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
+				get_segment_base(vcpu, VCPU_SREG_CS);
+
+			kvm_run->debug.arch.exception = DB_VECTOR;
+			kvm_run->exit_reason = KVM_EXIT_DEBUG;
+			*r = EMULATE_USER_EXIT;
+			return true;
+		}
+	}
+
+	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) {
+		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+					   vcpu->arch.dr7,
+					   vcpu->arch.db);
+
+		if (dr6 != 0) {
+			vcpu->arch.dr6 &= ~15;
+			vcpu->arch.dr6 |= dr6;
+			kvm_queue_exception(vcpu, DB_VECTOR);
+			*r = EMULATE_DONE;
+			return true;
+		}
+	}
+
+	return false;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4975,6 +5080,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
 	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
 		init_emulate_ctxt(vcpu);
+
+		/*
+		 * We will reenter on the same instruction since
+		 * we do not set complete_userspace_io.  This does not
+		 * handle watchpoints yet, those would be handled in
+		 * the emulate_ops.
+		 */
+		if (kvm_vcpu_check_breakpoint(vcpu, &r))
+			return r;
+
 		ctxt->interruptibility = 0;
 		ctxt->have_exception = false;
 		ctxt->perm_ok = false;
@@ -5031,17 +5146,18 @@ restart:
 		inject_emulated_exception(vcpu);
 		r = EMULATE_DONE;
 	} else if (vcpu->arch.pio.count) {
-		if (!vcpu->arch.pio.in)
+		if (!vcpu->arch.pio.in) {
+			/* FIXME: return into emulator if single-stepping.  */
 			vcpu->arch.pio.count = 0;
-		else {
+		} else {
 			writeback = false;
 			vcpu->arch.complete_userspace_io = complete_emulated_pio;
 		}
-		r = EMULATE_DO_MMIO;
+		r = EMULATE_USER_EXIT;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
-		r = EMULATE_DO_MMIO;
+		r = EMULATE_USER_EXIT;
 		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
@@ -5050,10 +5166,12 @@ restart:
 
 	if (writeback) {
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
-		kvm_set_rflags(vcpu, ctxt->eflags);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
+		if (r == EMULATE_DONE)
+			kvm_vcpu_check_singlestep(vcpu, &r);
+		kvm_set_rflags(vcpu, ctxt->eflags);
 	} else
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
 
@@ -5347,7 +5465,7 @@ static struct notifier_block pvclock_gtod_notifier = {
 int kvm_arch_init(void *opaque)
 {
 	int r;
-	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
+	struct kvm_x86_ops *ops = opaque;
 
 	if (kvm_x86_ops) {
 		printk(KERN_ERR "kvm: already loaded the other module\n");
@@ -5495,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+/*
+ * kvm_pv_kick_cpu_op:  Kick a vcpu.
+ *
+ * @apicid - apicid of vcpu to be kicked.
+ */
+static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
+{
+	struct kvm_lapic_irq lapic_irq;
+
+	lapic_irq.shorthand = 0;
+	lapic_irq.dest_mode = 0;
+	lapic_irq.dest_id = apicid;
+
+	lapic_irq.delivery_mode = APIC_DM_REMRD;
+	kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -5528,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HC_KICK_CPU:
+		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
+		ret = 0;
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
@@ -5689,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
-static void kvm_gen_update_masterclock(struct kvm *kvm)
-{
-#ifdef CONFIG_X86_64
-	int i;
-	struct kvm_vcpu *vcpu;
-	struct kvm_arch *ka = &kvm->arch;
-
-	spin_lock(&ka->pvclock_gtod_sync_lock);
-	kvm_make_mclock_inprogress_request(kvm);
-	/* no guest entries from this point */
-	pvclock_update_vm_gtod_copy(kvm);
-
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
-
-	/* guest entries allowed */
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
-
-	spin_unlock(&ka->pvclock_gtod_sync_lock);
-#endif
-}
-
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
 	u64 eoi_exit_bitmap[4];
@@ -5950,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 				kvm_apic_accept_events(vcpu);
 				switch(vcpu->arch.mp_state) {
 				case KVM_MP_STATE_HALTED:
+					vcpu->arch.pv.pv_unhalted = false;
 					vcpu->arch.mp_state =
 						KVM_MP_STATE_RUNNABLE;
 				case KVM_MP_STATE_RUNNABLE:
@@ -6061,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
 		vcpu->mmio_needed = 0;
+
+		/* FIXME: return into emulator if single-stepping.  */
 		if (vcpu->mmio_is_write)
 			return 1;
 		vcpu->mmio_read_completed = 1;
@@ -6249,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
 	kvm_apic_accept_events(vcpu);
-	mp_state->mp_state = vcpu->arch.mp_state;
+	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
+					vcpu->arch.pv.pv_unhalted)
+		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+	else
+		mp_state->mp_state = vcpu->arch.mp_state;
+
 	return 0;
 }
 
@@ -6770,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 
+	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -7019,6 +7144,15 @@ out_free:
 	return -ENOMEM;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+	/*
+	 * memslots->generation has been incremented.
+	 * mmio generation may have reached its maximum value.
+	 */
+	kvm_mmu_invalidate_mmio_sptes(kvm);
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
@@ -7079,11 +7213,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	 */
 	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
 		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
-	/*
-	 * If memory slot is created, or moved, we need to clear all
-	 * mmio sptes.
-	 */
-	kvm_mmu_invalidate_mmio_sptes(kvm);
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -7103,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
 		|| kvm_apic_has_events(vcpu)
+		|| vcpu->arch.pv.pv_unhalted
 		|| atomic_read(&vcpu->arch.nmi_queued) ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
 		 kvm_cpu_has_interrupt(vcpu));
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 25b7ae8d058a..7609e0e421ec 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -6,6 +6,7 @@
  */
 #include <asm/checksum.h>
 #include <linux/module.h>
+#include <asm/smap.h>
 
 /**
  * csum_partial_copy_from_user - Copy and checksum from user space.
@@ -52,8 +53,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
 			len -= 2;
 		}
 	}
+	stac();
 	isum = csum_partial_copy_generic((__force const void *)src,
 				dst, len, isum, errp, NULL);
+	clac();
 	if (unlikely(*errp))
 		goto out_err;
 
@@ -82,6 +85,8 @@ __wsum
 csum_partial_copy_to_user(const void *src, void __user *dst,
 			  int len, __wsum isum, int *errp)
 {
+	__wsum ret;
+
 	might_sleep();
 
 	if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
@@ -105,8 +110,11 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
 	}
 
 	*errp = 0;
-	return csum_partial_copy_generic(src, (void __force *)dst,
-					 len, isum, NULL, errp);
+	stac();
+	ret = csum_partial_copy_generic(src, (void __force *)dst,
+					len, isum, NULL, errp);
+	clac();
+	return ret;
 }
 EXPORT_SYMBOL(csum_partial_copy_to_user);
 
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 906fea315791..c905e89e19fe 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(copy_in_user);
  * Since protection fault in copy_from/to_user is not a normal situation,
  * it is not necessary to optimize tail handling.
  */
-unsigned long
+__visible unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
 {
 	char c;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5d7e51f3fd28..533a85e3a07e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,10 +1,8 @@
 # x86 Opcode Maps
 #
 # This is (mostly) based on following documentations.
-# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
-#   (#325383-040US, October 2011)
-# - Intel(R) Advanced Vector Extensions Programming Reference
-#   (#319433-011,JUNE 2011).
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+#   (#326018-047US, June 2013)
 #
 #<Opcode maps>
 # Table: table-name
@@ -29,6 +27,7 @@
 #  - (F3): the last prefix is 0xF3
 #  - (F2): the last prefix is 0xF2
 #  - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+#  - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
 
 Table: one byte opcode
 Referrer:
@@ -246,8 +245,8 @@ c2: RETN Iw (f64)
 c3: RETN
 c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
 c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
-c6: Grp11 Eb,Ib (1A)
-c7: Grp11 Ev,Iz (1A)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
 c8: ENTER Iw,Ib
 c9: LEAVE (d64)
 ca: RETF Iw
@@ -293,8 +292,8 @@ ef: OUT DX,eAX
 # 0xf0 - 0xff
 f0: LOCK (Prefix)
 f1:
-f2: REPNE (Prefix)
-f3: REP/REPE (Prefix)
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
 f4: HLT
 f5: CMC
 f6: Grp3_1 Eb (1A)
@@ -326,7 +325,8 @@ AVXcode: 1
 0a:
 0b: UD2 (1B)
 0c:
-0d: NOP Ev | GrpP
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
 0e: FEMMS
 # 3DNow! uses the last imm byte as opcode extension.
 0f: 3DNow! Pq,Qq,Ib
@@ -729,12 +729,12 @@ dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
 de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
 df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
-f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
-f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
 f2: ANDN Gy,By,Ey (v)
 f3: Grp17 (1A)
 f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: MULX By,Gy,rDX,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
 f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
 EndTable
 
@@ -861,8 +861,8 @@ EndTable
 
 GrpTable: Grp7
 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
 5:
@@ -880,15 +880,21 @@ EndTable
 GrpTable: Grp9
 1: CMPXCHG8B/16B Mq/Mdq
 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
-7: VMPTRST Mq | VMPTRST Mq (F3)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
 
 GrpTable: Grp10
 EndTable
 
-GrpTable: Grp11
-# Note: the operands are given by group opcode
-0: MOV
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
 EndTable
 
 GrpTable: Grp12
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 2ec29ac78ae6..04664cdb7fda 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -78,8 +78,8 @@ __ref void *alloc_low_pages(unsigned int num)
 	return __va(pfn << PAGE_SHIFT);
 }
 
-/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE	(5 * PAGE_SIZE)
+/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */
+#define INIT_PGT_BUF_SIZE	(6 * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
 {
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0215e2c563ef..799580cabc78 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -487,7 +487,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
 	unsigned long offset;
 	resource_size_t last_addr;
 	unsigned int nrpages;
-	enum fixed_addresses idx0, idx;
+	enum fixed_addresses idx;
 	int i, slot;
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
@@ -540,8 +540,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
 	/*
 	 * Ok, go for it..
 	 */
-	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
-	idx = idx0;
+	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
 	while (nrpages > 0) {
 		early_set_fixmap(idx, phys_addr, prot);
 		phys_addr += PAGE_SIZE;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index cdd0da9dd530..266ca912f62e 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -146,6 +146,7 @@ int __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
 	u64 start, end;
+	u32 hotpluggable;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -154,7 +155,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		goto out_err_bad_srat;
 	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 		goto out_err;
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
+	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+	if (hotpluggable && !save_add_info())
 		goto out_err;
 
 	start = ma->base_address;
@@ -174,9 +176,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 
 	node_set(node, numa_nodes_parsed);
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
-	       node, pxm,
-	       (unsigned long long) start, (unsigned long long) end - 1);
+	pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n",
+		node, pxm,
+		(unsigned long long) start, (unsigned long long) end - 1,
+		hotpluggable ? " hotplug" : "");
 
 	return 0;
 out_err_bad_srat:
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 48768df2471a..6890d8498e0b 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -403,7 +403,7 @@ static void nmi_cpu_down(void *dummy)
 		nmi_cpu_shutdown(dummy);
 }
 
-static int nmi_create_files(struct super_block *sb, struct dentry *root)
+static int nmi_create_files(struct dentry *root)
 {
 	unsigned int i;
 
@@ -420,14 +420,14 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 			continue;
 
 		snprintf(buf,  sizeof(buf), "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
-		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
-		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
-		oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra);
+		dir = oprofilefs_mkdir(root, buf);
+		oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled);
+		oprofilefs_create_ulong(dir, "event", &counter_config[i].event);
+		oprofilefs_create_ulong(dir, "count", &counter_config[i].count);
+		oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask);
+		oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &counter_config[i].user);
+		oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra);
 	}
 
 	return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b2b94438ff05..50d86c0e9ba4 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -454,16 +454,16 @@ static void init_ibs(void)
 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 }
 
-static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
+static int (*create_arch_files)(struct dentry *root);
 
-static int setup_ibs_files(struct super_block *sb, struct dentry *root)
+static int setup_ibs_files(struct dentry *root)
 {
 	struct dentry *dir;
 	int ret = 0;
 
 	/* architecture specific files */
 	if (create_arch_files)
-		ret = create_arch_files(sb, root);
+		ret = create_arch_files(root);
 
 	if (ret)
 		return ret;
@@ -479,26 +479,26 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 	ibs_config.max_cnt_op = 250000;
 
 	if (ibs_caps & IBS_CAPS_FETCHSAM) {
-		dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
-		oprofilefs_create_ulong(sb, dir, "enable",
+		dir = oprofilefs_mkdir(root, "ibs_fetch");
+		oprofilefs_create_ulong(dir, "enable",
 					&ibs_config.fetch_enabled);
-		oprofilefs_create_ulong(sb, dir, "max_count",
+		oprofilefs_create_ulong(dir, "max_count",
 					&ibs_config.max_cnt_fetch);
-		oprofilefs_create_ulong(sb, dir, "rand_enable",
+		oprofilefs_create_ulong(dir, "rand_enable",
 					&ibs_config.rand_en);
 	}
 
 	if (ibs_caps & IBS_CAPS_OPSAM) {
-		dir = oprofilefs_mkdir(sb, root, "ibs_op");
-		oprofilefs_create_ulong(sb, dir, "enable",
+		dir = oprofilefs_mkdir(root, "ibs_op");
+		oprofilefs_create_ulong(dir, "enable",
 					&ibs_config.op_enabled);
-		oprofilefs_create_ulong(sb, dir, "max_count",
+		oprofilefs_create_ulong(dir, "max_count",
 					&ibs_config.max_cnt_op);
 		if (ibs_caps & IBS_CAPS_OPCNT)
-			oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+			oprofilefs_create_ulong(dir, "dispatched_ops",
 						&ibs_config.dispatched_ops);
 		if (ibs_caps & IBS_CAPS_BRNTRGT)
-			oprofilefs_create_ulong(sb, dir, "branch_target",
+			oprofilefs_create_ulong(dir, "branch_target",
 						&ibs_config.branch_target);
 	}
 
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index d641897a1f4e..b30e937689d6 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -568,13 +568,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 	 */
 	if (bus) {
 		struct pci_bus *child;
-		list_for_each_entry(child, &bus->children, node) {
-			struct pci_dev *self = child->self;
-			if (!self)
-				continue;
-
-			pcie_bus_configure_settings(child, self->pcie_mpss);
-		}
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
 	}
 
 	if (bus && node != -1) {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 94919e307f8e..db6b1ab43255 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -210,6 +210,8 @@ static void pcibios_allocate_bridge_resources(struct pci_dev *dev)
 		r = &dev->resource[idx];
 		if (!r->flags)
 			continue;
+		if (r->parent)	/* Already allocated */
+			continue;
 		if (!r->start || pci_claim_resource(dev, idx) < 0) {
 			/*
 			 * Something is wrong with the region.
@@ -318,6 +320,8 @@ static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev)
 	r = &dev->resource[PCI_ROM_RESOURCE];
 	if (!r->flags || !r->start)
 		return;
+	if (r->parent) /* Already allocated */
+		return;
 
 	if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
 		r->end -= r->start;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 082e88129712..5596c7bdd327 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
 	if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
 		return -ENODEV;
 
-	if (start > end)
+	if (start > end || !addr)
 		return -EINVAL;
 
 	mutex_lock(&pci_mmcfg_lock);
@@ -716,11 +716,6 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
 		return -EEXIST;
 	}
 
-	if (!addr) {
-		mutex_unlock(&pci_mmcfg_lock);
-		return -EINVAL;
-	}
-
 	rc = -EBUSY;
 	cfg = pci_mmconfig_alloc(seg, start, end, addr);
 	if (cfg == NULL) {
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 6eb18c42a28a..903fded50786 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -23,11 +23,11 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/smp.h>
 
-#include <asm/acpi.h>
 #include <asm/segment.h>
-#include <asm/io.h>
-#include <asm/smp.h>
 #include <asm/pci_x86.h>
 #include <asm/hw_irq.h>
 #include <asm/io_apic.h>
@@ -43,7 +43,7 @@
 #define PCI_FIXED_BAR_4_SIZE	0x14
 #define PCI_FIXED_BAR_5_SIZE	0x1c
 
-static int pci_soc_mode = 0;
+static int pci_soc_mode;
 
 /**
  * fixed_bar_cap - return the offset of the fixed BAR cap if found
@@ -141,7 +141,8 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
  */
 static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
 {
-	/* This is a workaround for A0 LNC bug where PCI status register does
+	/*
+	 * This is a workaround for A0 LNC bug where PCI status register does
 	 * not have new CAP bit set. can not be written by SW either.
 	 *
 	 * PCI header type in real LNC indicates a single function device, this
@@ -154,7 +155,7 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
 				|| devfn == PCI_DEVFN(0, 0)
 				|| devfn == PCI_DEVFN(3, 0)))
 		return 1;
-	return 0; /* langwell on others */
+	return 0; /* Langwell on others */
 }
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
@@ -172,7 +173,8 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
 {
 	int offset;
 
-	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
+	/*
+	 * On MRST, there is no PCI ROM BAR, this will cause a subsequent read
 	 * to ROM BAR return 0 then being ignored.
 	 */
 	if (where == PCI_ROM_ADDRESS)
@@ -210,7 +212,8 @@ static int mrst_pci_irq_enable(struct pci_dev *dev)
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 
-	/* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
+	/*
+	 * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
 	 * IOAPIC RTE entries, so we just enable RTE for the device.
 	 */
 	irq_attr.ioapic = mp_find_ioapic(dev->irq);
@@ -235,7 +238,7 @@ struct pci_ops pci_mrst_ops = {
  */
 int __init pci_mrst_init(void)
 {
-	printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n");
+	pr_info("Intel MID platform detected, using MID PCI ops\n");
 	pci_mmcfg_late_init();
 	pcibios_enable_irq = mrst_pci_irq_enable;
 	pci_root_ops = pci_mrst_ops;
@@ -244,17 +247,21 @@ int __init pci_mrst_init(void)
 	return 1;
 }
 
-/* Langwell devices are not true pci devices, they are not subject to 10 ms
- * d3 to d0 delay required by pci spec.
+/*
+ * Langwell devices are not true PCI devices; they are not subject to 10 ms
+ * d3 to d0 delay required by PCI spec.
  */
 static void pci_d3delay_fixup(struct pci_dev *dev)
 {
-	/* PCI fixups are effectively decided compile time. If we have a dual
-	   SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
-        if (!pci_soc_mode)
-            return;
-	/* true pci devices in lincroft should allow type 1 access, the rest
-	 * are langwell fake pci devices.
+	/*
+	 * PCI fixups are effectively decided compile time. If we have a dual
+	 * SoC/non-SoC kernel we don't want to mangle d3 on non-SoC devices.
+	 */
+	if (!pci_soc_mode)
+		return;
+	/*
+	 * True PCI devices in Lincroft should allow type 1 access, the rest
+	 * are Langwell fake PCI devices.
 	 */
 	if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
 		return;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 1cf5b300305e..424f4c97a44d 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -25,10 +25,10 @@
 #include <asm/cpu.h>
 
 #ifdef CONFIG_X86_32
-unsigned long saved_context_ebx;
-unsigned long saved_context_esp, saved_context_ebp;
-unsigned long saved_context_esi, saved_context_edi;
-unsigned long saved_context_eflags;
+__visible unsigned long saved_context_ebx;
+__visible unsigned long saved_context_esp, saved_context_ebp;
+__visible unsigned long saved_context_esi, saved_context_edi;
+__visible unsigned long saved_context_eflags;
 #endif
 struct saved_context saved_context;
 
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a0fde91c16cf..304fca20d96e 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -20,26 +20,26 @@
 #include <asm/suspend.h>
 
 /* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
+extern __visible const void __nosave_begin, __nosave_end;
 
 /* Defined in hibernate_asm_64.S */
-extern int restore_image(void);
+extern asmlinkage int restore_image(void);
 
 /*
  * Address to jump to in the last phase of restore in order to get to the image
  * kernel's text (this value is passed in the image header).
  */
-unsigned long restore_jump_address;
+unsigned long restore_jump_address __visible;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
  * in the image header).
  */
-unsigned long restore_cr3;
+unsigned long restore_cr3 __visible;
 
-pgd_t *temp_level4_pgt;
+pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code;
+void *relocated_restore_code __visible;
 
 static void *alloc_pgt_page(void *context)
 {
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index e6773dc8ac41..093a892026f9 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -68,7 +68,7 @@ BEGIN {
 
 	lprefix1_expr = "\\((66|!F3)\\)"
 	lprefix2_expr = "\\(F3\\)"
-	lprefix3_expr = "\\((F2|!F3)\\)"
+	lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
@@ -83,6 +83,8 @@ BEGIN {
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
 	prefix_num["REPNE"] = "INAT_PFX_REPNE"
 	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
 	prefix_num["LOCK"] = "INAT_PFX_LOCK"
 	prefix_num["SEG=CS"] = "INAT_PFX_CS"
 	prefix_num["SEG=DS"] = "INAT_PFX_DS"
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index c74436e687bf..72074d528400 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode)
 	cycle_t ret;
 	u64 last;
 	u32 version;
-	u32 migrate_count;
 	u8 flags;
 	unsigned cpu, cpu1;
 
 
 	/*
-	 * When looping to get a consistent (time-info, tsc) pair, we
-	 * also need to deal with the possibility we can switch vcpus,
-	 * so make sure we always re-fetch time-info for the current vcpu.
+	 * Note: hypervisor must guarantee that:
+	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+	 * 2. that per-CPU pvclock time info is updated if the
+	 *    underlying CPU changes.
+	 * 3. that version is increased whenever underlying CPU
+	 *    changes.
+	 *
 	 */
 	do {
 		cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode)
 
 		pvti = get_pvti(cpu);
 
-		migrate_count = pvti->migrate_count;
-
 		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
 		/*
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode)
 		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
 	} while (unlikely(cpu != cpu1 ||
 			  (pvti->pvti.version & 1) ||
-			  pvti->pvti.version != version ||
-			  pvti->migrate_count != migrate_count));
+			  pvti->pvti.version != version));
 
 	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
 		*mode = VCLOCK_NONE;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097ef3d7d..2fc216dfbd9c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void)
 
 	if (!xen_initial_domain())
 		cpuid_leaf1_edx_mask &=
-			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
-			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+			~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
 
 	cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
 
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 		addr = (unsigned long)xen_int3;
 	else if (addr == (unsigned long)stack_segment)
 		addr = (unsigned long)xen_stack_segment;
-	else if (addr == (unsigned long)double_fault ||
-		 addr == (unsigned long)nmi) {
+	else if (addr == (unsigned long)double_fault) {
 		/* Don't need to handle these */
 		return 0;
 #ifdef CONFIG_X86_MCE
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 		 */
 		;
 #endif
-	} else {
+	} else if (addr == (unsigned long)nmi)
+		/*
+		 * Use the native version as well.
+		 */
+		;
+	else {
 		/* Some other trap using IST? */
 		if (WARN_ON(val->ist != 0))
 			return 0;
@@ -1710,6 +1713,8 @@ static void __init xen_hvm_guest_init(void)
 
 	xen_hvm_init_shared_info();
 
+	xen_panic_handler_init();
+
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
 	xen_hvm_smp_init();
@@ -1720,15 +1725,12 @@ static void __init xen_hvm_guest_init(void)
 	xen_hvm_init_mmu_ops();
 }
 
-static bool __init xen_hvm_platform(void)
+static uint32_t __init xen_hvm_platform(void)
 {
 	if (xen_pv_domain())
-		return false;
-
-	if (!xen_cpuid_base())
-		return false;
+		return 0;
 
-	return true;
+	return xen_cpuid_base();
 }
 
 bool xen_hvm_need_lapic(void)
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc015ae1..0da7f863056f 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags)
 	/* convert from IF type flag */
 	flags = !(flags & X86_EFLAGS_IF);
 
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
+	/* See xen_irq_enable() for why preemption must be disabled. */
 	preempt_disable();
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = flags;
-	preempt_enable_no_resched();
-
-	/* Doesn't matter if we get preempted here, because any
-	   pending event will get dealt with anyway. */
 
 	if (flags == 0) {
-		preempt_check_resched();
 		barrier(); /* unmask then check (avoid races) */
 		if (unlikely(vcpu->evtchn_upcall_pending))
 			xen_force_evtchn_callback();
-	}
+		preempt_enable();
+	} else
+		preempt_enable_no_resched();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
@@ -82,10 +77,12 @@ static void xen_irq_enable(void)
 {
 	struct vcpu_info *vcpu;
 
-	/* We don't need to worry about being preempted here, since
-	   either a) interrupts are disabled, so no preemption, or b)
-	   the caller is confused and is trying to re-enable interrupts
-	   on an indeterminate processor. */
+	/*
+	 * We may be preempted as soon as vcpu->evtchn_upcall_mask is
+	 * cleared, so disable preemption to ensure we check for
+	 * events on the VCPU we are still running on.
+	 */
+	preempt_disable();
 
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
@@ -96,6 +93,8 @@ static void xen_irq_enable(void)
 	barrier(); /* unmask then check (avoid races) */
 	if (unlikely(vcpu->evtchn_upcall_pending))
 		xen_force_evtchn_callback();
+
+	preempt_enable();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa5927e..0d4ec35895d4 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,6 +161,7 @@
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/balloon.h>
 #include <xen/grant_table.h>
 
 #include "multicalls.h"
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page,
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
-			struct gnttab_unmap_grant_ref *unmap_op;
+			struct gnttab_unmap_and_replace *unmap_op;
+			struct page *scratch_page = get_balloon_scratch_page();
+			unsigned long scratch_page_address = (unsigned long)
+				__va(page_to_pfn(scratch_page) << PAGE_SHIFT);
 
 			/*
 			 * It might be that we queued all the m2p grant table
@@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page,
 			}
 
 			mcs = xen_mc_entry(
-					sizeof(struct gnttab_unmap_grant_ref));
+					sizeof(struct gnttab_unmap_and_replace));
 			unmap_op = mcs.args;
 			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->new_addr = scratch_page_address;
 			unmap_op->handle = kmap_op->handle;
-			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
-					GNTTABOP_unmap_grant_ref, unmap_op, 1);
+					GNTTABOP_unmap_and_replace, unmap_op, 1);
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 
-			set_pte_at(&init_mm, address, ptep,
-					pfn_pte(pfn, PAGE_KERNEL));
-			__flush_tlb_single(address);
+			mcs = __xen_mc_entry(0);
+			MULTI_update_va_mapping(mcs.mc, scratch_page_address,
+					pfn_pte(page_to_pfn(get_balloon_scratch_page()),
+					PAGE_KERNEL_RO), 0);
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+
 			kmap_op->host_addr = 0;
+			put_balloon_scratch_page();
 		}
 	}
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8f3eea6b80c5..09f3059cb00b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,6 +33,9 @@
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
+#ifdef CONFIG_X86_64
+extern const char nmi[];
+#endif
 extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk(
 	unsigned long pfn;
 
 	/*
-	 * If the PFNs are currently mapped, the VA mapping also needs
-	 * to be updated to be 1:1.
+	 * If the PFNs are currently mapped, clear the mappings
+	 * (except for the ISA region which must be 1:1 mapped) to
+	 * release the refcounts (in Xen) on the original frames.
 	 */
-	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
+		pte_t pte = __pte_ma(0);
+
+		if (pfn < PFN_UP(ISA_END_ADDRESS))
+			pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+
 		(void)HYPERVISOR_update_va_mapping(
-			(unsigned long)__va(pfn << PAGE_SHIFT),
-			mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+			(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+	}
 
 	if (start_pfn < nr_pages)
 		*released += xen_release_chunk(
@@ -547,7 +556,13 @@ void xen_enable_syscall(void)
 	}
 #endif /* CONFIG_X86_64 */
 }
-
+void __cpuinit xen_enable_nmi(void)
+{
+#ifdef CONFIG_X86_64
+	if (register_callback(CALLBACKTYPE_nmi, nmi))
+		BUG();
+#endif
+}
 void __init xen_arch_setup(void)
 {
 	xen_panic_handler_init();
@@ -565,7 +580,7 @@ void __init xen_arch_setup(void)
 
 	xen_enable_sysenter();
 	xen_enable_syscall();
-
+	xen_enable_nmi();
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b81c88e51daa..9235842cd76a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
 
 	xen_filter_cpu_maps();
 	xen_setup_vcpu_info_placement();
+	xen_init_spinlocks();
 }
 
 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -572,6 +573,12 @@ static inline int xen_map_vector(int vector)
 	case IRQ_WORK_VECTOR:
 		xen_vector = XEN_IRQ_WORK_VECTOR;
 		break;
+#ifdef CONFIG_X86_64
+	case NMI_VECTOR:
+	case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
+		xen_vector = XEN_NMI_VECTOR;
+		break;
+#endif
 	default:
 		xen_vector = -1;
 		printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
@@ -680,7 +687,6 @@ void __init xen_smp_init(void)
 {
 	smp_ops = xen_smp_ops;
 	xen_fill_possible_map();
-	xen_init_spinlocks();
 }
 
 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..0438b9324a72 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
 #include "xen-ops.h"
 #include "debugfs.h"
 
-#ifdef CONFIG_XEN_DEBUG_FS
-static struct xen_spinlock_stats
-{
-	u64 taken;
-	u32 taken_slow;
-	u32 taken_slow_nested;
-	u32 taken_slow_pickup;
-	u32 taken_slow_spurious;
-	u32 taken_slow_irqenable;
+enum xen_contention_stat {
+	TAKEN_SLOW,
+	TAKEN_SLOW_PICKUP,
+	TAKEN_SLOW_SPURIOUS,
+	RELEASED_SLOW,
+	RELEASED_SLOW_KICKED,
+	NR_CONTENTION_STATS
+};
 
-	u64 released;
-	u32 released_slow;
-	u32 released_slow_kicked;
 
+#ifdef CONFIG_XEN_DEBUG_FS
 #define HISTO_BUCKETS	30
-	u32 histo_spin_total[HISTO_BUCKETS+1];
-	u32 histo_spin_spinning[HISTO_BUCKETS+1];
+static struct xen_spinlock_stats
+{
+	u32 contention_stats[NR_CONTENTION_STATS];
 	u32 histo_spin_blocked[HISTO_BUCKETS+1];
-
-	u64 time_total;
-	u64 time_spinning;
 	u64 time_blocked;
 } spinlock_stats;
 
 static u8 zero_stats;
 
-static unsigned lock_timeout = 1 << 10;
-#define TIMEOUT lock_timeout
-
 static inline void check_zero(void)
 {
-	if (unlikely(zero_stats)) {
-		memset(&spinlock_stats, 0, sizeof(spinlock_stats));
-		zero_stats = 0;
+	u8 ret;
+	u8 old = ACCESS_ONCE(zero_stats);
+	if (unlikely(old)) {
+		ret = cmpxchg(&zero_stats, old, 0);
+		/* This ensures only one fellow resets the stat */
+		if (ret == old)
+			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
 	}
 }
 
-#define ADD_STATS(elem, val)			\
-	do { check_zero(); spinlock_stats.elem += (val); } while(0)
+static inline void add_stats(enum xen_contention_stat var, u32 val)
+{
+	check_zero();
+	spinlock_stats.contention_stats[var] += val;
+}
 
 static inline u64 spin_time_start(void)
 {
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
 		array[HISTO_BUCKETS]++;
 }
 
-static inline void spin_time_accum_spinning(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
-	spinlock_stats.time_spinning += delta;
-}
-
-static inline void spin_time_accum_total(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_total);
-	spinlock_stats.time_total += delta;
-}
-
 static inline void spin_time_accum_blocked(u64 start)
 {
 	u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
 }
 #else  /* !CONFIG_XEN_DEBUG_FS */
 #define TIMEOUT			(1 << 10)
-#define ADD_STATS(elem, val)	do { (void)(val); } while(0)
+static inline void add_stats(enum xen_contention_stat var, u32 val)
+{
+}
 
 static inline u64 spin_time_start(void)
 {
 	return 0;
 }
 
-static inline void spin_time_accum_total(u64 start)
-{
-}
-static inline void spin_time_accum_spinning(u64 start)
-{
-}
 static inline void spin_time_accum_blocked(u64 start)
 {
 }
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t;
 	asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
 #endif
 
-struct xen_spinlock {
-	unsigned char lock;		/* 0 -> free; 1 -> locked */
-	xen_spinners_t spinners;	/* count of waiting cpus */
+struct xen_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
 };
 
-static int xen_spin_is_locked(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	/* Not strictly true; this is only the count of contended
-	   lock-takers entering the slow path. */
-	return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	u8 old = 1;
-
-	asm("xchgb %b0,%1"
-	    : "+q" (old), "+m" (xl->lock) : : "memory");
-
-	return old == 0;
-}
-
-static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
-
-/*
- * Mark a cpu as interested in a lock.  Returns the CPU's previous
- * lock of interest, in case we got preempted by an interrupt.
- */
-static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
-{
-	struct xen_spinlock *prev;
-
-	prev = __this_cpu_read(lock_spinners);
-	__this_cpu_write(lock_spinners, xl);
-
-	wmb();			/* set lock of interest before count */
-
-	inc_spinners(xl);
-
-	return prev;
-}
-
-/*
- * Mark a cpu as no longer interested in a lock.  Restores previous
- * lock of interest (NULL for none).
- */
-static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
-{
-	dec_spinners(xl);
-	wmb();			/* decrement count before restoring lock */
-	__this_cpu_write(lock_spinners, prev);
-}
+static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
+static cpumask_t waiting_cpus;
 
-static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
+static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	struct xen_spinlock *prev;
 	int irq = __this_cpu_read(lock_kicker_irq);
-	int ret;
+	struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
+	int cpu = smp_processor_id();
 	u64 start;
+	unsigned long flags;
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1)
-		return 0;
+		return;
 
 	start = spin_time_start();
 
-	/* announce we're spinning */
-	prev = spinning_lock(xl);
+	/*
+	 * Make sure an interrupt handler can't upset things in a
+	 * partially setup state.
+	 */
+	local_irq_save(flags);
+	/*
+	 * We don't really care if we're overwriting some other
+	 * (lock,want) pair, as that would mean that we're currently
+	 * in an interrupt context, and the outer context had
+	 * interrupts enabled.  That has already kicked the VCPU out
+	 * of xen_poll_irq(), so it will just return spuriously and
+	 * retry with newly setup (lock,want).
+	 *
+	 * The ordering protocol on this is that the "lock" pointer
+	 * may only be set non-NULL if the "want" ticket is correct.
+	 * If we're updating "want", we must first clear "lock".
+	 */
+	w->lock = NULL;
+	smp_wmb();
+	w->want = want;
+	smp_wmb();
+	w->lock = lock;
 
-	ADD_STATS(taken_slow, 1);
-	ADD_STATS(taken_slow_nested, prev != NULL);
+	/* This uses set_bit, which atomic and therefore a barrier */
+	cpumask_set_cpu(cpu, &waiting_cpus);
+	add_stats(TAKEN_SLOW, 1);
 
-	do {
-		unsigned long flags;
+	/* clear pending */
+	xen_clear_irq_pending(irq);
 
-		/* clear pending */
-		xen_clear_irq_pending(irq);
+	/* Only check lock once pending cleared */
+	barrier();
 
-		/* check again make sure it didn't become free while
-		   we weren't looking  */
-		ret = xen_spin_trylock(lock);
-		if (ret) {
-			ADD_STATS(taken_slow_pickup, 1);
+	/*
+	 * Mark entry to slowpath before doing the pickup test to make
+	 * sure we don't deadlock with an unlocker.
+	 */
+	__ticket_enter_slowpath(lock);
 
-			/*
-			 * If we interrupted another spinlock while it
-			 * was blocking, make sure it doesn't block
-			 * without rechecking the lock.
-			 */
-			if (prev != NULL)
-				xen_set_irq_pending(irq);
-			goto out;
-		}
+	/*
+	 * check again make sure it didn't become free while
+	 * we weren't looking
+	 */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		add_stats(TAKEN_SLOW_PICKUP, 1);
+		goto out;
+	}
 
-		flags = arch_local_save_flags();
-		if (irq_enable) {
-			ADD_STATS(taken_slow_irqenable, 1);
-			raw_local_irq_enable();
-		}
+	/* Allow interrupts while blocked */
+	local_irq_restore(flags);
 
-		/*
-		 * Block until irq becomes pending.  If we're
-		 * interrupted at this point (after the trylock but
-		 * before entering the block), then the nested lock
-		 * handler guarantees that the irq will be left
-		 * pending if there's any chance the lock became free;
-		 * xen_poll_irq() returns immediately if the irq is
-		 * pending.
-		 */
-		xen_poll_irq(irq);
+	/*
+	 * If an interrupt happens here, it will leave the wakeup irq
+	 * pending, which will cause xen_poll_irq() to return
+	 * immediately.
+	 */
 
-		raw_local_irq_restore(flags);
+	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
+	xen_poll_irq(irq);
+	add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
 
-		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
-	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
+	local_irq_save(flags);
 
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
-
 out:
-	unspinning_lock(xl, prev);
-	spin_time_accum_blocked(start);
-
-	return ret;
-}
-
-static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	unsigned timeout;
-	u8 oldval;
-	u64 start_spin;
-
-	ADD_STATS(taken, 1);
-
-	start_spin = spin_time_start();
-
-	do {
-		u64 start_spin_fast = spin_time_start();
-
-		timeout = TIMEOUT;
-
-		asm("1: xchgb %1,%0\n"
-		    "   testb %1,%1\n"
-		    "   jz 3f\n"
-		    "2: rep;nop\n"
-		    "   cmpb $0,%0\n"
-		    "   je 1b\n"
-		    "   dec %2\n"
-		    "   jnz 2b\n"
-		    "3:\n"
-		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
-		    : "1" (1)
-		    : "memory");
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
 
-		spin_time_accum_spinning(start_spin_fast);
+	local_irq_restore(flags);
 
-	} while (unlikely(oldval != 0 &&
-			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
-
-	spin_time_accum_total(start_spin);
-}
-
-static void xen_spin_lock(struct arch_spinlock *lock)
-{
-	__xen_spin_lock(lock, false);
-}
-
-static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
-{
-	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
+	spin_time_accum_blocked(start);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
 
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
 {
 	int cpu;
 
-	ADD_STATS(released_slow, 1);
+	add_stats(RELEASED_SLOW, 1);
+
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
 
-	for_each_online_cpu(cpu) {
-		/* XXX should mix up next cpu selection */
-		if (per_cpu(lock_spinners, cpu) == xl) {
-			ADD_STATS(released_slow_kicked, 1);
+		/* Make sure we read lock before want */
+		if (ACCESS_ONCE(w->lock) == lock &&
+		    ACCESS_ONCE(w->want) == next) {
+			add_stats(RELEASED_SLOW_KICKED, 1);
 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+			break;
 		}
 	}
 }
 
-static void xen_spin_unlock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	ADD_STATS(released, 1);
-
-	smp_wmb();		/* make sure no writes get moved after unlock */
-	xl->lock = 0;		/* release lock */
-
-	/*
-	 * Make sure unlock happens before checking for waiting
-	 * spinners.  We need a strong barrier to enforce the
-	 * write-read ordering to different memory locations, as the
-	 * CPU makes no implied guarantees about their ordering.
-	 */
-	mb();
-
-	if (unlikely(xl->spinners))
-		xen_spin_unlock_slow(xl);
-}
-
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
 	BUG();
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu)
 	per_cpu(irq_name, cpu) = NULL;
 }
 
+static bool xen_pvspin __initdata = true;
+
 void __init xen_init_spinlocks(void)
 {
 	/*
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void)
 	if (xen_hvm_domain())
 		return;
 
-	BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
+	if (!xen_pvspin) {
+		printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
+		return;
+	}
 
-	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
-	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
-	pv_lock_ops.spin_lock = xen_spin_lock;
-	pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
-	pv_lock_ops.spin_trylock = xen_spin_trylock;
-	pv_lock_ops.spin_unlock = xen_spin_unlock;
+	static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
+	pv_lock_ops.unlock_kick = xen_unlock_kick;
+}
+
+static __init int xen_parse_nopvspin(char *arg)
+{
+	xen_pvspin = false;
+	return 0;
 }
+early_param("xen_nopvspin", xen_parse_nopvspin);
 
 #ifdef CONFIG_XEN_DEBUG_FS
 
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void)
 
 	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 
-	debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
-
-	debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
 	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow);
-	debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_nested);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW]);
 	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_pickup);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
 	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_spurious);
-	debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_irqenable);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
 
-	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 	debugfs_create_u32("released_slow", 0444, d_spin_debug,
-			   &spinlock_stats.released_slow);
+			   &spinlock_stats.contention_stats[RELEASED_SLOW]);
 	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
-			   &spinlock_stats.released_slow_kicked);
+			   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
 
-	debugfs_create_u64("time_spinning", 0444, d_spin_debug,
-			   &spinlock_stats.time_spinning);
 	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 			   &spinlock_stats.time_blocked);
-	debugfs_create_u64("time_total", 0444, d_spin_debug,
-			   &spinlock_stats.time_total);
 
-	debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
-				spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
-	debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
-				spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
 	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 				spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 86782c5d7e2a..95f8c6142328 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -105,9 +105,9 @@ static inline void __init xen_init_apic(void)
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
 #define DECL_ASM(ret, name, ...)		\
-	ret name(__VA_ARGS__);			\
-	extern char name##_end[];		\
-	extern char name##_reloc[]		\
+	__visible ret name(__VA_ARGS__);	\
+	extern char name##_end[] __visible;	\
+	extern char name##_reloc[] __visible
 
 DECL_ASM(void, xen_irq_enable_direct, void);
 DECL_ASM(void, xen_irq_disable_direct, void);
@@ -115,11 +115,11 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 /* These are not functions, and cannot be called normally */
-void xen_iret(void);
-void xen_sysexit(void);
-void xen_sysret32(void);
-void xen_sysret64(void);
-void xen_adjust_exception_frame(void);
+__visible void xen_iret(void);
+__visible void xen_sysexit(void);
+__visible void xen_sysret32(void);
+__visible void xen_sysret64(void);
+__visible void xen_adjust_exception_frame(void);
 
 extern int xen_panic_handler_init(void);