977 files changed, 28039 insertions, 15518 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089117fe..2520ca5b42eb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -931,6 +931,18 @@ config STRICT_MODULE_RWX
 config ARCH_WANT_RELAX_ORDER
 	bool
 
+config ARCH_HAS_REFCOUNT
+	bool
+	help
+	  An architecture selects this when it has implemented refcount_t
+	  using open coded assembly primitives that provide an optimized
+	  refcount_t implementation, possibly at the expense of some full
+	  refcount state checks of CONFIG_REFCOUNT_FULL=y.
+
+	  The refcount overflow check behavior, however, must be retained.
+	  Catching overflows is the primary security concern for protecting
+	  against bugs in reference counts.
+
 config REFCOUNT_FULL
 	bool "Perform full reference count validation at the expense of speed"
 	help
diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig
index 539e8b5a6cbd..f4ec420d7f2d 100644
--- a/arch/alpha/defconfig
+++ b/arch/alpha/defconfig
@@ -19,7 +19,6 @@ CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_VLAN_8021Q=m
@@ -57,7 +56,6 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_REISERFS_FS=m
-CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index d103db5af5ff..5b974ab8425c 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,6 +3,7 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += export.h
+generic-y += fb.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/alpha/include/asm/asm-prototypes.h b/arch/alpha/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..d12c68ea340b
--- /dev/null
+++ b/arch/alpha/include/asm/asm-prototypes.h
@@ -0,0 +1,18 @@
+#include <linux/spinlock.h>
+
+#include <asm/checksum.h>
+#include <asm/console.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+extern void __divl(void);
+extern void __reml(void);
+extern void __divq(void);
+extern void __remq(void);
+extern void __divlu(void);
+extern void __remlu(void);
+extern void __divqu(void);
+extern void __remqu(void);
diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h
index dad300fa14ce..8dcf9dbda618 100644
--- a/arch/alpha/include/asm/core_marvel.h
+++ b/arch/alpha/include/asm/core_marvel.h
@@ -312,7 +312,7 @@ struct io7 {
 	io7_port7_csrs *csrs;
 	struct io7_port ports[IO7_NUM_PORTS];
 
-	spinlock_t irq_lock;
+	raw_spinlock_t irq_lock;
 };
 
 #ifndef __EXTERN_INLINE
diff --git a/arch/alpha/include/asm/fb.h b/arch/alpha/include/asm/fb.h
deleted file mode 100644
index fa9bbb96b2b3..000000000000
--- a/arch/alpha/include/asm/fb.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/device.h>
-
-/* Caching is off in the I/O space quadrant by design.  */
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index fb01dfb760c2..05a70edd57b6 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -25,18 +25,10 @@
 	:	"r" (uaddr), "r"(oparg)				\
 	:	"memory")
 
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -62,17 +54,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index a40b9fc0c6c3..718ac0b64adf 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,11 +16,6 @@
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
         return lock.lock == 0;
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 02760f6e6ca4..3b26cc62dadb 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -64,20 +64,12 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 7b285dd4fe05..c6133a045352 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -109,4 +109,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index a2945fea6c86..53de540e39a7 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -366,11 +366,6 @@
 #define __NR_epoll_create		407
 #define __NR_epoll_ctl			408
 #define __NR_epoll_wait			409
-/* Feb 2007: These three sys_epoll defines shouldn't be here but culling
- * them would break userspace apps ... we'll kill them off in 2010 :) */
-#define __NR_sys_epoll_create		__NR_epoll_create
-#define __NR_sys_epoll_ctl		__NR_epoll_ctl
-#define __NR_sys_epoll_wait		__NR_epoll_wait
 #define __NR_remap_file_pages		410
 #define __NR_set_tid_address		411
 #define __NR_restart_syscall		412
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 03ff832b1cb4..b10c316475dd 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -118,7 +118,7 @@ alloc_io7(unsigned int pe)
 
 	io7 = alloc_bootmem(sizeof(*io7));
 	io7->pe = pe;
-	spin_lock_init(&io7->irq_lock);
+	raw_spin_lock_init(&io7->irq_lock);
 
 	for (h = 0; h < 4; h++) {
 		io7->ports[h].io7 = io7;
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index ffbdb3fb672f..676bab6e3123 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -42,11 +42,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	struct resource *res;
-
-	res = alloc_bootmem(sizeof(*res));
-
-	return res;
+	return alloc_bootmem(sizeof(struct resource));
 }
 
 asmlinkage long
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 92c0d460815b..cbecd527c696 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -38,7 +38,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
 	unsigned long nr, start, size;
 	int shift = sparse ? 5 : 0;
 
-	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	nr = vma_pages(vma);
 	start = vma->vm_pgoff;
 	size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
 
@@ -64,8 +64,7 @@ static int pci_mmap_resource(struct kobject *kobj,
 			     struct bin_attribute *attr,
 			     struct vm_area_struct *vma, int sparse)
 {
-	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
-						       struct device, kobj));
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
 	struct resource *res = attr->private;
 	enum pci_mmap_state mmap_type;
 	struct pci_bus_region bar;
@@ -255,7 +254,7 @@ static int __legacy_mmap_fits(struct pci_controller *hose,
 {
 	unsigned long nr, start, size;
 
-	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	nr = vma_pages(vma);
 	start = vma->vm_pgoff;
 	size = ((res_size - 1) >> PAGE_SHIFT) + 1;
 
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 5f387ee5b5c5..8322df174bbf 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -379,11 +379,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	struct resource *res;
-
-	res = alloc_bootmem(sizeof(*res));
-
-	return res;
+	return alloc_bootmem(sizeof(struct resource));
 }
 
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 491e6a604e82..249229ab4942 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -1094,8 +1094,9 @@ get_sysnames(unsigned long type, unsigned long variation, unsigned long cpu,
 	default: /* default to variation "0" for now */
 		break;
 	case ST_DEC_EB164:
-		if (member < ARRAY_SIZE(eb164_indices))
-			*variation_name = eb164_names[eb164_indices[member]];
+		if (member >= ARRAY_SIZE(eb164_indices))
+			break;
+		*variation_name = eb164_names[eb164_indices[member]];
 		/* PC164 may show as EB164 variation, but with EV56 CPU,
 		   so, since no true EB164 had anything but EV5... */
 		if (eb164_indices[member] == 0 && cpu == EV56_CPU)
diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c
index c803fc76ae4f..4dbd4e415041 100644
--- a/arch/alpha/kernel/smc37c669.c
+++ b/arch/alpha/kernel/smc37c669.c
@@ -2007,11 +2007,8 @@ static void __init SMC37c669_config_mode(
 static unsigned char __init SMC37c669_read_config( 
     unsigned char index )
 {
-    unsigned char data;
-
-    wb( &SMC37c669->index_port, index );
-    data = rb( &SMC37c669->data_port );
-    return data;
+	wb(&SMC37c669->index_port, index);
+	return rb(&SMC37c669->data_port);
 }
 
 /*
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 24e41bd7d3c9..3e533920371f 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -115,11 +115,11 @@ io7_enable_irq(struct irq_data *d)
 		return;
 	}
 
-	spin_lock(&io7->irq_lock);
+	raw_spin_lock(&io7->irq_lock);
 	*ctl |= 1UL << 24;
 	mb();
 	*ctl;
-	spin_unlock(&io7->irq_lock);
+	raw_spin_unlock(&io7->irq_lock);
 }
 
 static void
@@ -136,11 +136,11 @@ io7_disable_irq(struct irq_data *d)
 		return;
 	}
 
-	spin_lock(&io7->irq_lock);
+	raw_spin_lock(&io7->irq_lock);
 	*ctl &= ~(1UL << 24);
 	mb();
 	*ctl;
-	spin_unlock(&io7->irq_lock);
+	raw_spin_unlock(&io7->irq_lock);
 }
 
 static void
@@ -263,7 +263,7 @@ init_io7_irqs(struct io7 *io7,
 	 */
 	printk("  Interrupts reported to CPU at PE %u\n", boot_cpuid);
 
-	spin_lock(&io7->irq_lock);
+	raw_spin_lock(&io7->irq_lock);
 
 	/* set up the error irqs */
 	io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, boot_cpuid);
@@ -295,7 +295,7 @@ init_io7_irqs(struct io7 *io7,
 	for (i = 0; i < 16; ++i)
 		init_one_io7_msi(io7, i, boot_cpuid);
 
-	spin_unlock(&io7->irq_lock);
+	raw_spin_unlock(&io7->irq_lock);
 }
 
 static void __init
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 65bb102d985b..ddb89a18cf26 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -193,8 +193,10 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
 static long dummy_emul(void) { return 0; }
 long (*alpha_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask)
   = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul_imprecise);
 long (*alpha_fp_emul) (unsigned long pc)
   = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul);
 #else
 long alpha_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask);
 long alpha_fp_emul (unsigned long pc);
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index d17d705f6545..1c2d456da7f2 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -53,6 +53,7 @@ extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
 #ifdef MODULE
 
 MODULE_DESCRIPTION("FP Software completion module");
+MODULE_LICENSE("GPL v2");
 
 extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
 extern long (*alpha_fp_emul) (unsigned long pc);
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 0ff7e07edcd4..2367a67c5f10 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -101,7 +101,6 @@
 		mmc@0x15000 {
 			compatible = "altr,socfpga-dw-mshc";
 			reg = < 0x15000 0x400 >;
-			num-slots = < 1 >;
 			fifo-depth = < 16 >;
 			card-detect-delay = < 200 >;
 			clocks = <&apbclk>, <&mmcclk>;
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
index 459fc656b759..48bb4b4cd234 100644
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -104,7 +104,6 @@
 		mmc@0x15000 {
 			compatible = "snps,dw-mshc";
 			reg = <0x15000 0x400>;
-			num-slots = <1>;
 			fifo-depth = <1024>;
 			card-detect-delay = <200>;
 			clocks = <&apbclk>, <&mmcclk>;
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 54b54da6384c..11859287c52a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i)
 	atomic_ops_unlock(flags);
 }
 
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
 #endif
 
 /*
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 11e1b1f3acda..eb887dd13e74 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -73,20 +73,11 @@
 
 #endif
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
 #ifndef CONFIG_ARC_HAS_LLSC
 	preempt_disable();	/* to guarantee atomic r-m-w of futex op */
 #endif
@@ -118,30 +109,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	preempt_enable();
 #endif
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (oldval == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (oldval != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (oldval < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (oldval >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (oldval <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (oldval > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 233d5ffe6ec7..a325e6a36523 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -16,11 +16,6 @@
 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
 #ifdef CONFIG_ARC_HAS_LLSC
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb15067e..f1b3f1d575d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,7 +50,7 @@ config ARM
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARM_SMCCC if CPU_V7
-	select HAVE_CBPF_JIT
+	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index a17ca8d78656..c94a88ae834d 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2015 Linaro Ltd
+ * Copyright (C) 2013-2017 Linaro Ltd
  * Authors: Roy Franz <roy.franz@linaro.org>
  *          Ard Biesheuvel <ard.biesheuvel@linaro.org>
  *
@@ -8,6 +8,9 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
 		.macro	__nop
 #ifdef CONFIG_EFI_STUB
 		@ This is almost but not quite a NOP, since it does clobber the
@@ -15,7 +18,7 @@
 		@ PE/COFF expects the magic string "MZ" at offset 0, while the
 		@ ARM/Linux boot protocol expects an executable instruction
 		@ there.
-		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
+		.inst	MZ_MAGIC | (0x1310 << 16)	@ tstne r0, #0x4d000
 #else
  AR_CLASS(	mov	r0, r0		)
   M_CLASS(	nop.w			)
@@ -34,96 +37,97 @@
 		@ The only 2 fields of the MSDOS header that are used are this
 		@ PE/COFF offset, and the "MZ" bytes at offset 0x0.
 		@
-		.long	pe_header - start	@ Offset to the PE header.
+		.long	pe_header - start		@ Offset to the PE header.
 
 pe_header:
-		.ascii	"PE\0\0"
+		.long	PE_MAGIC
 
 coff_header:
-		.short	0x01c2			@ ARM or Thumb
-		.short	2			@ nr_sections
-		.long	0 			@ TimeDateStamp
-		.long	0			@ PointerToSymbolTable
-		.long	1			@ NumberOfSymbols
-		.short	section_table - optional_header
-						@ SizeOfOptionalHeader
-		.short	0x306			@ Characteristics.
-						@ IMAGE_FILE_32BIT_MACHINE |
-						@ IMAGE_FILE_DEBUG_STRIPPED |
-						@ IMAGE_FILE_EXECUTABLE_IMAGE |
-						@ IMAGE_FILE_LINE_NUMS_STRIPPED
+		.short	IMAGE_FILE_MACHINE_THUMB	@ Machine
+		.short	section_count			@ NumberOfSections
+		.long	0 				@ TimeDateStamp
+		.long	0				@ PointerToSymbolTable
+		.long	0				@ NumberOfSymbols
+		.short	section_table - optional_header	@ SizeOfOptionalHeader
+		.short	IMAGE_FILE_32BIT_MACHINE | \
+			IMAGE_FILE_DEBUG_STRIPPED | \
+			IMAGE_FILE_EXECUTABLE_IMAGE | \
+			IMAGE_FILE_LINE_NUMS_STRIPPED	@ Characteristics
+
+#define __pecoff_code_size (__pecoff_data_start - __efi_start)
 
 optional_header:
-		.short	0x10b			@ PE32 format
-		.byte	0x02			@ MajorLinkerVersion
-		.byte	0x14			@ MinorLinkerVersion
-		.long	_end - __efi_start	@ SizeOfCode
-		.long	0			@ SizeOfInitializedData
-		.long	0			@ SizeOfUninitializedData
-		.long	efi_stub_entry - start	@ AddressOfEntryPoint
-		.long	start_offset		@ BaseOfCode
-		.long	0			@ data
+		.short	PE_OPT_MAGIC_PE32		@ PE32 format
+		.byte	0x02				@ MajorLinkerVersion
+		.byte	0x14				@ MinorLinkerVersion
+		.long	__pecoff_code_size		@ SizeOfCode
+		.long	__pecoff_data_size		@ SizeOfInitializedData
+		.long	0				@ SizeOfUninitializedData
+		.long	efi_stub_entry - start		@ AddressOfEntryPoint
+		.long	start_offset			@ BaseOfCode
+		.long	__pecoff_data_start - start	@ BaseOfData
 
 extra_header_fields:
-		.long	0			@ ImageBase
-		.long	0x200			@ SectionAlignment
-		.long	0x200			@ FileAlignment
-		.short	0			@ MajorOperatingSystemVersion
-		.short	0			@ MinorOperatingSystemVersion
-		.short	0			@ MajorImageVersion
-		.short	0			@ MinorImageVersion
-		.short	0			@ MajorSubsystemVersion
-		.short	0			@ MinorSubsystemVersion
-		.long	0			@ Win32VersionValue
+		.long	0				@ ImageBase
+		.long	SZ_4K				@ SectionAlignment
+		.long	SZ_512				@ FileAlignment
+		.short	0				@ MajorOsVersion
+		.short	0				@ MinorOsVersion
+		.short	0				@ MajorImageVersion
+		.short	0				@ MinorImageVersion
+		.short	0				@ MajorSubsystemVersion
+		.short	0				@ MinorSubsystemVersion
+		.long	0				@ Win32VersionValue
 
-		.long	_end - start		@ SizeOfImage
-		.long	start_offset		@ SizeOfHeaders
-		.long	0			@ CheckSum
-		.short	0xa			@ Subsystem (EFI application)
-		.short	0			@ DllCharacteristics
-		.long	0			@ SizeOfStackReserve
-		.long	0			@ SizeOfStackCommit
-		.long	0			@ SizeOfHeapReserve
-		.long	0			@ SizeOfHeapCommit
-		.long	0			@ LoaderFlags
-		.long	0x6			@ NumberOfRvaAndSizes
+		.long	__pecoff_end - start		@ SizeOfImage
+		.long	start_offset			@ SizeOfHeaders
+		.long	0				@ CheckSum
+		.short	IMAGE_SUBSYSTEM_EFI_APPLICATION	@ Subsystem
+		.short	0				@ DllCharacteristics
+		.long	0				@ SizeOfStackReserve
+		.long	0				@ SizeOfStackCommit
+		.long	0				@ SizeOfHeapReserve
+		.long	0				@ SizeOfHeapCommit
+		.long	0				@ LoaderFlags
+		.long	(section_table - .) / 8		@ NumberOfRvaAndSizes
 
-		.quad	0			@ ExportTable
-		.quad	0			@ ImportTable
-		.quad	0			@ ResourceTable
-		.quad	0			@ ExceptionTable
-		.quad	0			@ CertificationTable
-		.quad	0			@ BaseRelocationTable
+		.quad	0				@ ExportTable
+		.quad	0				@ ImportTable
+		.quad	0				@ ResourceTable
+		.quad	0				@ ExceptionTable
+		.quad	0				@ CertificationTable
+		.quad	0				@ BaseRelocationTable
 
 section_table:
-		@
-		@ The EFI application loader requires a relocation section
-		@ because EFI applications must be relocatable. This is a
-		@ dummy section as far as we are concerned.
-		@
-		.ascii	".reloc\0\0"
-		.long	0			@ VirtualSize
-		.long	0			@ VirtualAddress
-		.long	0			@ SizeOfRawData
-		.long	0			@ PointerToRawData
-		.long	0			@ PointerToRelocations
-		.long	0			@ PointerToLineNumbers
-		.short	0			@ NumberOfRelocations
-		.short	0			@ NumberOfLineNumbers
-		.long	0x42100040		@ Characteristics
-
 		.ascii	".text\0\0\0"
-		.long	_end - __efi_start	@ VirtualSize
-		.long	__efi_start		@ VirtualAddress
-		.long	_edata - __efi_start	@ SizeOfRawData
-		.long	__efi_start		@ PointerToRawData
-		.long	0			@ PointerToRelocations
-		.long	0			@ PointerToLineNumbers
-		.short	0			@ NumberOfRelocations
-		.short	0			@ NumberOfLineNumbers
-		.long	0xe0500020		@ Characteristics
+		.long	__pecoff_code_size		@ VirtualSize
+		.long	__efi_start			@ VirtualAddress
+		.long	__pecoff_code_size		@ SizeOfRawData
+		.long	__efi_start			@ PointerToRawData
+		.long	0				@ PointerToRelocations
+		.long	0				@ PointerToLineNumbers
+		.short	0				@ NumberOfRelocations
+		.short	0				@ NumberOfLineNumbers
+		.long	IMAGE_SCN_CNT_CODE | \
+			IMAGE_SCN_MEM_READ | \
+			IMAGE_SCN_MEM_EXECUTE		@ Characteristics
+
+		.ascii	".data\0\0\0"
+		.long	__pecoff_data_size		@ VirtualSize
+		.long	__pecoff_data_start - start	@ VirtualAddress
+		.long	__pecoff_data_rawsize		@ SizeOfRawData
+		.long	__pecoff_data_start - start	@ PointerToRawData
+		.long	0				@ PointerToRelocations
+		.long	0				@ PointerToLineNumbers
+		.short	0				@ NumberOfRelocations
+		.short	0				@ NumberOfLineNumbers
+		.long	IMAGE_SCN_CNT_INITIALIZED_DATA | \
+			IMAGE_SCN_MEM_READ | \
+			IMAGE_SCN_MEM_WRITE		@ Characteristics
+
+		.set	section_count, (. - section_table) / 40
 
-		.align	9
+		.align	12
 __efi_start:
 #endif
 		.endm
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index 81c493156ce8..7a4c59154361 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -48,13 +48,6 @@ SECTIONS
     *(.rodata)
     *(.rodata.*)
   }
-  .data : {
-    /*
-     * The EFI stub always executes from RAM, and runs strictly before the
-     * decompressor, so we can make an exception for its r/w data, and keep it
-     */
-    *(.data.efistub)
-  }
   .piggydata : {
     *(.piggydata)
   }
@@ -70,6 +63,26 @@ SECTIONS
   /* ensure the zImage file size is always a multiple of 64 bits */
   /* (without a dummy byte, ld just ignores the empty section) */
   .pad			: { BYTE(0); . = ALIGN(8); }
+
+#ifdef CONFIG_EFI_STUB
+  .data : ALIGN(4096) {
+    __pecoff_data_start = .;
+    /*
+     * The EFI stub always executes from RAM, and runs strictly before the
+     * decompressor, so we can make an exception for its r/w data, and keep it
+     */
+    *(.data.efistub)
+    __pecoff_data_end = .;
+
+    /*
+     * PE/COFF mandates a file size which is a multiple of 512 bytes if the
+     * section size equals or exceeds 4 KB
+     */
+    . = ALIGN(512);
+  }
+  __pecoff_data_rawsize = . - ADDR(.data);
+#endif
+
   _edata = .;
 
   _magic_sig = ZIMAGE_MAGIC(0x016f2818);
@@ -84,6 +97,9 @@ SECTIONS
   . = ALIGN(8);		/* the stack must be 64-bit aligned */
   .stack		: { *(.stack) }
 
+  PROVIDE(__pecoff_data_size = ALIGN(512) - ADDR(.data));
+  PROVIDE(__pecoff_end = ALIGN(512));
+
   .stab 0		: { *(.stab) }
   .stabstr 0		: { *(.stabstr) }
   .stab.excl 0		: { *(.stab.excl) }
diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
index 1f0f950dc11e..e0aea782c666 100644
--- a/arch/arm/boot/dts/imx6q-evi.dts
+++ b/arch/arm/boot/dts/imx6q-evi.dts
@@ -94,6 +94,15 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_ecspi1 &pinctrl_ecspi1cs>;
 	status = "okay";
+
+	fpga: fpga@0 {
+		compatible = "altr,fpga-passive-serial";
+		spi-max-frequency = <20000000>;
+		reg = <0>;
+		pinctrl-0 = <&pinctrl_fpgaspi>;
+		nconfig-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>;
+		nstat-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>;
+	};
 };
 
 &ecspi3 {
@@ -319,6 +328,13 @@
 		>;
 	};
 
+	pinctrl_fpgaspi: fpgaspigrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_ROW1__GPIO4_IO09 0x1b0b0
+			MX6QDL_PAD_KEY_ROW2__GPIO4_IO11 0x1b0b0
+		>;
+	};
+
 	pinctrl_gpminand: gpminandgrp {
 		fsl,pins = <
 			MX6QDL_PAD_NANDF_CLE__NAND_CLE 0xb0b1
diff --git a/arch/arm/boot/dts/imx7ulp-pinfunc.h b/arch/arm/boot/dts/imx7ulp-pinfunc.h
new file mode 100644
index 000000000000..fe511775b518
--- /dev/null
+++ b/arch/arm/boot/dts/imx7ulp-pinfunc.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __DTS_IMX7ULP_PINFUNC_H
+#define __DTS_IMX7ULP_PINFUNC_H
+
+/*
+ * The pin function ID is a tuple of
+ * <mux_conf_reg input_reg mux_mode input_val>
+ */
+
+#define IMX7ULP_PAD_PTC0__PTC0                                       0x0000 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC0__TRACE_D15                                  0x0000 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC0__LPUART4_CTS_B                              0x0000 0x0244 0x4 0x1
+#define IMX7ULP_PAD_PTC0__LPI2C4_SCL                                 0x0000 0x0278 0x5 0x1
+#define IMX7ULP_PAD_PTC0__TPM4_CLKIN                                 0x0000 0x0298 0x6 0x1
+#define IMX7ULP_PAD_PTC0__FB_AD0                                     0x0000 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC1__PTC1                                       0x0004 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC1__TRACE_D14                                  0x0004 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC1__LPUART4_RTS_B                              0x0004 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC1__LPI2C4_SDA                                 0x0004 0x027c 0x5 0x1
+#define IMX7ULP_PAD_PTC1__TPM4_CH0                                   0x0004 0x0280 0x6 0x1
+#define IMX7ULP_PAD_PTC1__FB_AD1                                     0x0004 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC2__PTC2                                       0x0008 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC2__TRACE_D13                                  0x0008 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC2__LPUART4_TX                                 0x0008 0x024c 0x4 0x1
+#define IMX7ULP_PAD_PTC2__LPI2C4_HREQ                                0x0008 0x0274 0x5 0x1
+#define IMX7ULP_PAD_PTC2__TPM4_CH1                                   0x0008 0x0284 0x6 0x1
+#define IMX7ULP_PAD_PTC2__FB_AD2                                     0x0008 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC3__PTC3                                       0x000c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC3__TRACE_D12                                  0x000c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC3__LPUART4_RX                                 0x000c 0x0248 0x4 0x1
+#define IMX7ULP_PAD_PTC3__TPM4_CH2                                   0x000c 0x0288 0x6 0x1
+#define IMX7ULP_PAD_PTC3__FB_AD3                                     0x000c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC4__PTC4                                       0x0010 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC4__TRACE_D11                                  0x0010 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC4__FXIO1_D0                                   0x0010 0x0204 0x2 0x1
+#define IMX7ULP_PAD_PTC4__LPSPI2_PCS1                                0x0010 0x02a0 0x3 0x1
+#define IMX7ULP_PAD_PTC4__LPUART5_CTS_B                              0x0010 0x0250 0x4 0x1
+#define IMX7ULP_PAD_PTC4__LPI2C5_SCL                                 0x0010 0x02bc 0x5 0x1
+#define IMX7ULP_PAD_PTC4__TPM4_CH3                                   0x0010 0x028c 0x6 0x1
+#define IMX7ULP_PAD_PTC4__FB_AD4                                     0x0010 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC5__PTC5                                       0x0014 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC5__TRACE_D10                                  0x0014 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC5__FXIO1_D1                                   0x0014 0x0208 0x2 0x1
+#define IMX7ULP_PAD_PTC5__LPSPI2_PCS2                                0x0014 0x02a4 0x3 0x1
+#define IMX7ULP_PAD_PTC5__LPUART5_RTS_B                              0x0014 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC5__LPI2C5_SDA                                 0x0014 0x02c0 0x5 0x1
+#define IMX7ULP_PAD_PTC5__TPM4_CH4                                   0x0014 0x0290 0x6 0x1
+#define IMX7ULP_PAD_PTC5__FB_AD5                                     0x0014 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC6__PTC6                                       0x0018 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC6__TRACE_D9                                   0x0018 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC6__FXIO1_D2                                   0x0018 0x020c 0x2 0x1
+#define IMX7ULP_PAD_PTC6__LPSPI2_PCS3                                0x0018 0x02a8 0x3 0x1
+#define IMX7ULP_PAD_PTC6__LPUART5_TX                                 0x0018 0x0258 0x4 0x1
+#define IMX7ULP_PAD_PTC6__LPI2C5_HREQ                                0x0018 0x02b8 0x5 0x1
+#define IMX7ULP_PAD_PTC6__TPM4_CH5                                   0x0018 0x0294 0x6 0x1
+#define IMX7ULP_PAD_PTC6__FB_AD6                                     0x0018 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC7__PTC7                                       0x001c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC7__TRACE_D8                                   0x001c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC7__FXIO1_D3                                   0x001c 0x0210 0x2 0x1
+#define IMX7ULP_PAD_PTC7__LPUART5_RX                                 0x001c 0x0254 0x4 0x1
+#define IMX7ULP_PAD_PTC7__TPM5_CH1                                   0x001c 0x02c8 0x6 0x1
+#define IMX7ULP_PAD_PTC7__FB_AD7                                     0x001c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC8__PTC8                                       0x0020 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC8__TRACE_D7                                   0x0020 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC8__FXIO1_D4                                   0x0020 0x0214 0x2 0x1
+#define IMX7ULP_PAD_PTC8__LPSPI2_SIN                                 0x0020 0x02b0 0x3 0x1
+#define IMX7ULP_PAD_PTC8__LPUART6_CTS_B                              0x0020 0x025c 0x4 0x1
+#define IMX7ULP_PAD_PTC8__LPI2C6_SCL                                 0x0020 0x02fc 0x5 0x1
+#define IMX7ULP_PAD_PTC8__TPM5_CLKIN                                 0x0020 0x02cc 0x6 0x1
+#define IMX7ULP_PAD_PTC8__FB_AD8                                     0x0020 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC9__PTC9                                       0x0024 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC9__TRACE_D6                                   0x0024 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC9__FXIO1_D5                                   0x0024 0x0218 0x2 0x1
+#define IMX7ULP_PAD_PTC9__LPSPI2_SOUT                                0x0024 0x02b4 0x3 0x1
+#define IMX7ULP_PAD_PTC9__LPUART6_RTS_B                              0x0024 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC9__LPI2C6_SDA                                 0x0024 0x0300 0x5 0x1
+#define IMX7ULP_PAD_PTC9__TPM5_CH0                                   0x0024 0x02c4 0x6 0x1
+#define IMX7ULP_PAD_PTC9__FB_AD9                                     0x0024 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC10__PTC10                                     0x0028 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC10__TRACE_D5                                  0x0028 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC10__FXIO1_D6                                  0x0028 0x021c 0x2 0x1
+#define IMX7ULP_PAD_PTC10__LPSPI2_SCK                                0x0028 0x02ac 0x3 0x1
+#define IMX7ULP_PAD_PTC10__LPUART6_TX                                0x0028 0x0264 0x4 0x1
+#define IMX7ULP_PAD_PTC10__LPI2C6_HREQ                               0x0028 0x02f8 0x5 0x1
+#define IMX7ULP_PAD_PTC10__TPM7_CH3                                  0x0028 0x02e8 0x6 0x1
+#define IMX7ULP_PAD_PTC10__FB_AD10                                   0x0028 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC11__PTC11                                     0x002c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC11__TRACE_D4                                  0x002c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC11__FXIO1_D7                                  0x002c 0x0220 0x2 0x1
+#define IMX7ULP_PAD_PTC11__LPSPI2_PCS0                               0x002c 0x029c 0x3 0x1
+#define IMX7ULP_PAD_PTC11__LPUART6_RX                                0x002c 0x0260 0x4 0x1
+#define IMX7ULP_PAD_PTC11__TPM7_CH4                                  0x002c 0x02ec 0x6 0x1
+#define IMX7ULP_PAD_PTC11__FB_AD11                                   0x002c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC12__PTC12                                     0x0030 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC12__TRACE_D3                                  0x0030 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC12__FXIO1_D8                                  0x0030 0x0224 0x2 0x1
+#define IMX7ULP_PAD_PTC12__LPSPI3_PCS1                               0x0030 0x0314 0x3 0x1
+#define IMX7ULP_PAD_PTC12__LPUART7_CTS_B                             0x0030 0x0268 0x4 0x1
+#define IMX7ULP_PAD_PTC12__LPI2C7_SCL                                0x0030 0x0308 0x5 0x1
+#define IMX7ULP_PAD_PTC12__TPM7_CH5                                  0x0030 0x02f0 0x6 0x1
+#define IMX7ULP_PAD_PTC12__FB_AD12                                   0x0030 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC13__PTC13                                     0x0034 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC13__TRACE_D2                                  0x0034 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC13__FXIO1_D9                                  0x0034 0x0228 0x2 0x1
+#define IMX7ULP_PAD_PTC13__LPSPI3_PCS2                               0x0034 0x0318 0x3 0x1
+#define IMX7ULP_PAD_PTC13__LPUART7_RTS_B                             0x0034 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC13__LPI2C7_SDA                                0x0034 0x030c 0x5 0x1
+#define IMX7ULP_PAD_PTC13__TPM7_CLKIN                                0x0034 0x02f4 0x6 0x1
+#define IMX7ULP_PAD_PTC13__FB_AD13                                   0x0034 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC14__PTC14                                     0x0038 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC14__TRACE_D1                                  0x0038 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC14__FXIO1_D10                                 0x0038 0x022c 0x2 0x1
+#define IMX7ULP_PAD_PTC14__LPSPI3_PCS3                               0x0038 0x031c 0x3 0x1
+#define IMX7ULP_PAD_PTC14__LPUART7_TX                                0x0038 0x0270 0x4 0x1
+#define IMX7ULP_PAD_PTC14__LPI2C7_HREQ                               0x0038 0x0304 0x5 0x1
+#define IMX7ULP_PAD_PTC14__TPM7_CH0                                  0x0038 0x02dc 0x6 0x1
+#define IMX7ULP_PAD_PTC14__FB_AD14                                   0x0038 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC15__PTC15                                     0x003c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC15__TRACE_D0                                  0x003c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC15__FXIO1_D11                                 0x003c 0x0230 0x2 0x1
+#define IMX7ULP_PAD_PTC15__LPUART7_RX                                0x003c 0x026c 0x4 0x1
+#define IMX7ULP_PAD_PTC15__TPM7_CH1                                  0x003c 0x02e0 0x6 0x1
+#define IMX7ULP_PAD_PTC15__FB_AD15                                   0x003c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC16__PTC16                                     0x0040 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC16__TRACE_CLKOUT                              0x0040 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC16__FXIO1_D12                                 0x0040 0x0234 0x2 0x1
+#define IMX7ULP_PAD_PTC16__LPSPI3_SIN                                0x0040 0x0324 0x3 0x1
+#define IMX7ULP_PAD_PTC16__TPM7_CH2                                  0x0040 0x02e4 0x6 0x1
+#define IMX7ULP_PAD_PTC16__FB_ALE_FB_CS1_B_FB_TS_B                   0x0040 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC17__PTC17                                     0x0044 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC17__FXIO1_D13                                 0x0044 0x0238 0x2 0x1
+#define IMX7ULP_PAD_PTC17__LPSPI3_SOUT                               0x0044 0x0328 0x3 0x1
+#define IMX7ULP_PAD_PTC17__TPM6_CLKIN                                0x0044 0x02d8 0x6 0x1
+#define IMX7ULP_PAD_PTC17__FB_CS0_B                                  0x0044 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC18__PTC18                                     0x0048 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC18__FXIO1_D14                                 0x0048 0x023c 0x2 0x1
+#define IMX7ULP_PAD_PTC18__LPSPI3_SCK                                0x0048 0x0320 0x3 0x1
+#define IMX7ULP_PAD_PTC18__TPM6_CH0                                  0x0048 0x02d0 0x6 0x1
+#define IMX7ULP_PAD_PTC18__FB_OE_B                                   0x0048 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC19__PTC19                                     0x004c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC19__FXIO1_D15                                 0x004c 0x0240 0x2 0x1
+#define IMX7ULP_PAD_PTC19__LPSPI3_PCS0                               0x004c 0x0310 0x3 0x1
+#define IMX7ULP_PAD_PTC19__TPM6_CH1                                  0x004c 0x02d4 0x6 0x1
+#define IMX7ULP_PAD_PTC19__FB_A16                                    0x004c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTD0__PTD0                                       0x0080 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD0__SDHC0_RESET_B                              0x0080 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD1__PTD1                                       0x0084 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD1__SDHC0_CMD                                  0x0084 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD2__PTD2                                       0x0088 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD2__SDHC0_CLK                                  0x0088 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD3__PTD3                                       0x008c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD3__SDHC0_D7                                   0x008c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD4__PTD4                                       0x0090 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD4__SDHC0_D6                                   0x0090 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD5__PTD5                                       0x0094 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD5__SDHC0_D5                                   0x0094 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD6__PTD6                                       0x0098 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD6__SDHC0_D4                                   0x0098 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD7__PTD7                                       0x009c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD7__SDHC0_D3                                   0x009c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD8__PTD8                                       0x00a0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD8__TPM4_CLKIN                                 0x00a0 0x0298 0x6 0x2
+#define IMX7ULP_PAD_PTD8__SDHC0_D2                                   0x00a0 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD9__PTD9                                       0x00a4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD9__TPM4_CH0                                   0x00a4 0x0280 0x6 0x2
+#define IMX7ULP_PAD_PTD9__SDHC0_D1                                   0x00a4 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD10__PTD10                                     0x00a8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD10__TPM4_CH1                                  0x00a8 0x0284 0x6 0x2
+#define IMX7ULP_PAD_PTD10__SDHC0_D0                                  0x00a8 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD11__PTD11                                     0x00ac 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD11__TPM4_CH2                                  0x00ac 0x0288 0x6 0x2
+#define IMX7ULP_PAD_PTD11__SDHC0_DQS                                 0x00ac 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE0__PTE0                                       0x0100 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE0__FXIO1_D31                                  0x0100 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE0__LPSPI2_PCS1                                0x0100 0x02a0 0x3 0x2
+#define IMX7ULP_PAD_PTE0__LPUART4_CTS_B                              0x0100 0x0244 0x4 0x2
+#define IMX7ULP_PAD_PTE0__LPI2C4_SCL                                 0x0100 0x0278 0x5 0x2
+#define IMX7ULP_PAD_PTE0__SDHC1_D1                                   0x0100 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE0__FB_A25                                     0x0100 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE1__PTE1                                       0x0104 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE1__FXIO1_D30                                  0x0104 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE1__LPSPI2_PCS2                                0x0104 0x02a4 0x3 0x2
+#define IMX7ULP_PAD_PTE1__LPUART4_RTS_B                              0x0104 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE1__LPI2C4_SDA                                 0x0104 0x027c 0x5 0x2
+#define IMX7ULP_PAD_PTE1__SDHC1_D0                                   0x0104 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE1__FB_A26                                     0x0104 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE2__PTE2                                       0x0108 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE2__FXIO1_D29                                  0x0108 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE2__LPSPI2_PCS3                                0x0108 0x02a8 0x3 0x2
+#define IMX7ULP_PAD_PTE2__LPUART4_TX                                 0x0108 0x024c 0x4 0x2
+#define IMX7ULP_PAD_PTE2__LPI2C4_HREQ                                0x0108 0x0274 0x5 0x2
+#define IMX7ULP_PAD_PTE2__SDHC1_CLK                                  0x0108 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE3__PTE3                                       0x010c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE3__FXIO1_D28                                  0x010c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE3__LPUART4_RX                                 0x010c 0x0248 0x4 0x2
+#define IMX7ULP_PAD_PTE3__TPM5_CH1                                   0x010c 0x02c8 0x6 0x2
+#define IMX7ULP_PAD_PTE3__SDHC1_CMD                                  0x010c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE4__PTE4                                       0x0110 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE4__FXIO1_D27                                  0x0110 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE4__LPSPI2_SIN                                 0x0110 0x02b0 0x3 0x2
+#define IMX7ULP_PAD_PTE4__LPUART5_CTS_B                              0x0110 0x0250 0x4 0x2
+#define IMX7ULP_PAD_PTE4__LPI2C5_SCL                                 0x0110 0x02bc 0x5 0x2
+#define IMX7ULP_PAD_PTE4__TPM5_CLKIN                                 0x0110 0x02cc 0x6 0x2
+#define IMX7ULP_PAD_PTE4__SDHC1_D3                                   0x0110 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE5__PTE5                                       0x0114 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE5__FXIO1_D26                                  0x0114 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE5__LPSPI2_SOUT                                0x0114 0x02b4 0x3 0x2
+#define IMX7ULP_PAD_PTE5__LPUART5_RTS_B                              0x0114 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE5__LPI2C5_SDA                                 0x0114 0x02c0 0x5 0x2
+#define IMX7ULP_PAD_PTE5__TPM5_CH0                                   0x0114 0x02c4 0x6 0x2
+#define IMX7ULP_PAD_PTE5__SDHC1_D2                                   0x0114 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE6__PTE6                                       0x0118 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE6__FXIO1_D25                                  0x0118 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE6__LPSPI2_SCK                                 0x0118 0x02ac 0x3 0x2
+#define IMX7ULP_PAD_PTE6__LPUART5_TX                                 0x0118 0x0258 0x4 0x2
+#define IMX7ULP_PAD_PTE6__LPI2C5_HREQ                                0x0118 0x02b8 0x5 0x2
+#define IMX7ULP_PAD_PTE6__TPM7_CH3                                   0x0118 0x02e8 0x6 0x2
+#define IMX7ULP_PAD_PTE6__SDHC1_D4                                   0x0118 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE6__FB_A17                                     0x0118 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE7__PTE7                                       0x011c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE7__TRACE_D7                                   0x011c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE7__VIU_FID                                    0x011c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE7__FXIO1_D24                                  0x011c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE7__LPSPI2_PCS0                                0x011c 0x029c 0x3 0x2
+#define IMX7ULP_PAD_PTE7__LPUART5_RX                                 0x011c 0x0254 0x4 0x2
+#define IMX7ULP_PAD_PTE7__TPM7_CH4                                   0x011c 0x02ec 0x6 0x2
+#define IMX7ULP_PAD_PTE7__SDHC1_D5                                   0x011c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE7__FB_A18                                     0x011c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE8__PTE8                                       0x0120 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE8__TRACE_D6                                   0x0120 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE8__VIU_D16                                    0x0120 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE8__FXIO1_D23                                  0x0120 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE8__LPSPI3_PCS1                                0x0120 0x0314 0x3 0x2
+#define IMX7ULP_PAD_PTE8__LPUART6_CTS_B                              0x0120 0x025c 0x4 0x2
+#define IMX7ULP_PAD_PTE8__LPI2C6_SCL                                 0x0120 0x02fc 0x5 0x2
+#define IMX7ULP_PAD_PTE8__TPM7_CH5                                   0x0120 0x02f0 0x6 0x2
+#define IMX7ULP_PAD_PTE8__SDHC1_WP                                   0x0120 0x0200 0x7 0x1
+#define IMX7ULP_PAD_PTE8__SDHC1_D6                                   0x0120 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE8__FB_CS3_B_FB_BE7_0_BLS31_24_B               0x0120 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE9__PTE9                                       0x0124 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE9__TRACE_D5                                   0x0124 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE9__VIU_D17                                    0x0124 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE9__FXIO1_D22                                  0x0124 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE9__LPSPI3_PCS2                                0x0124 0x0318 0x3 0x2
+#define IMX7ULP_PAD_PTE9__LPUART6_RTS_B                              0x0124 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE9__LPI2C6_SDA                                 0x0124 0x0300 0x5 0x2
+#define IMX7ULP_PAD_PTE9__TPM7_CLKIN                                 0x0124 0x02f4 0x6 0x2
+#define IMX7ULP_PAD_PTE9__SDHC1_CD                                   0x0124 0x032c 0x7 0x1
+#define IMX7ULP_PAD_PTE9__SDHC1_D7                                   0x0124 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE9__FB_TBST_B_FB_CS2_B_FB_BE15_8_BLS23_16_B    0x0124 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE10__PTE10                                     0x0128 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE10__TRACE_D4                                  0x0128 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE10__VIU_D18                                   0x0128 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE10__FXIO1_D21                                 0x0128 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE10__LPSPI3_PCS3                               0x0128 0x031c 0x3 0x2
+#define IMX7ULP_PAD_PTE10__LPUART6_TX                                0x0128 0x0264 0x4 0x2
+#define IMX7ULP_PAD_PTE10__LPI2C6_HREQ                               0x0128 0x02f8 0x5 0x2
+#define IMX7ULP_PAD_PTE10__TPM7_CH0                                  0x0128 0x02dc 0x6 0x2
+#define IMX7ULP_PAD_PTE10__SDHC1_VS                                  0x0128 0x0000 0x7 0x0
+#define IMX7ULP_PAD_PTE10__SDHC1_DQS                                 0x0128 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE10__FB_A19                                    0x0128 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE11__PTE11                                     0x012c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE11__TRACE_D3                                  0x012c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE11__VIU_D19                                   0x012c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE11__FXIO1_D20                                 0x012c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE11__LPUART6_RX                                0x012c 0x0260 0x4 0x2
+#define IMX7ULP_PAD_PTE11__TPM7_CH1                                  0x012c 0x02e0 0x6 0x2
+#define IMX7ULP_PAD_PTE11__SDHC1_RESET_B                             0x012c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE11__FB_A20                                    0x012c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE12__PTE12                                     0x0130 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE12__TRACE_D2                                  0x0130 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE12__VIU_D20                                   0x0130 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE12__FXIO1_D19                                 0x0130 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE12__LPSPI3_SIN                                0x0130 0x0324 0x3 0x2
+#define IMX7ULP_PAD_PTE12__LPUART7_CTS_B                             0x0130 0x0268 0x4 0x2
+#define IMX7ULP_PAD_PTE12__LPI2C7_SCL                                0x0130 0x0308 0x5 0x2
+#define IMX7ULP_PAD_PTE12__TPM7_CH2                                  0x0130 0x02e4 0x6 0x2
+#define IMX7ULP_PAD_PTE12__SDHC1_WP                                  0x0130 0x0200 0x8 0x2
+#define IMX7ULP_PAD_PTE12__FB_A21                                    0x0130 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE13__PTE13                                     0x0134 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE13__TRACE_D1                                  0x0134 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE13__VIU_D21                                   0x0134 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE13__FXIO1_D18                                 0x0134 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE13__LPSPI3_SOUT                               0x0134 0x0328 0x3 0x2
+#define IMX7ULP_PAD_PTE13__LPUART7_RTS_B                             0x0134 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE13__LPI2C7_SDA                                0x0134 0x030c 0x5 0x2
+#define IMX7ULP_PAD_PTE13__TPM6_CLKIN                                0x0134 0x02d8 0x6 0x2
+#define IMX7ULP_PAD_PTE13__SDHC1_CD                                  0x0134 0x032c 0x8 0x2
+#define IMX7ULP_PAD_PTE13__FB_A22                                    0x0134 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE14__PTE14                                     0x0138 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE14__TRACE_D0                                  0x0138 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE14__VIU_D22                                   0x0138 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE14__FXIO1_D17                                 0x0138 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE14__LPSPI3_SCK                                0x0138 0x0320 0x3 0x2
+#define IMX7ULP_PAD_PTE14__LPUART7_TX                                0x0138 0x0270 0x4 0x2
+#define IMX7ULP_PAD_PTE14__LPI2C7_HREQ                               0x0138 0x0304 0x5 0x2
+#define IMX7ULP_PAD_PTE14__TPM6_CH0                                  0x0138 0x02d0 0x6 0x2
+#define IMX7ULP_PAD_PTE14__SDHC1_VS                                  0x0138 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE14__FB_A23                                    0x0138 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE15__PTE15                                     0x013c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE15__TRACE_CLKOUT                              0x013c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE15__VIU_D23                                   0x013c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE15__FXIO1_D16                                 0x013c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE15__LPSPI3_PCS0                               0x013c 0x0310 0x3 0x2
+#define IMX7ULP_PAD_PTE15__LPUART7_RX                                0x013c 0x026c 0x4 0x2
+#define IMX7ULP_PAD_PTE15__TPM6_CH1                                  0x013c 0x02d4 0x6 0x2
+#define IMX7ULP_PAD_PTE15__FB_A24                                    0x013c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF0__PTF0                                       0x0180 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF0__VIU_DE                                     0x0180 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF0__LPUART4_CTS_B                              0x0180 0x0244 0x4 0x3
+#define IMX7ULP_PAD_PTF0__LPI2C4_SCL                                 0x0180 0x0278 0x5 0x3
+#define IMX7ULP_PAD_PTF0__TPM4_CLKIN                                 0x0180 0x0298 0x6 0x3
+#define IMX7ULP_PAD_PTF0__FB_RW_B                                    0x0180 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF1__PTF1                                       0x0184 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF1__VIU_HSYNC                                  0x0184 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF1__LPUART4_RTS_B                              0x0184 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF1__LPI2C4_SDA                                 0x0184 0x027c 0x5 0x3
+#define IMX7ULP_PAD_PTF1__TPM4_CH0                                   0x0184 0x0280 0x6 0x3
+#define IMX7ULP_PAD_PTF1__CLKOUT                                     0x0184 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF2__PTF2                                       0x0188 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF2__VIU_VSYNC                                  0x0188 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF2__LPUART4_TX                                 0x0188 0x024c 0x4 0x3
+#define IMX7ULP_PAD_PTF2__LPI2C4_HREQ                                0x0188 0x0274 0x5 0x3
+#define IMX7ULP_PAD_PTF2__TPM4_CH1                                   0x0188 0x0284 0x6 0x3
+#define IMX7ULP_PAD_PTF2__FB_TSIZ1_FB_CS5_B_FB_BE23_16_BLS15_8_B     0x0188 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF3__PTF3                                       0x018c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF3__VIU_PCLK                                   0x018c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF3__LPUART4_RX                                 0x018c 0x0248 0x4 0x3
+#define IMX7ULP_PAD_PTF3__TPM4_CH2                                   0x018c 0x0288 0x6 0x3
+#define IMX7ULP_PAD_PTF3__FB_AD16                                    0x018c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF4__PTF4                                       0x0190 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF4__VIU_D0                                     0x0190 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF4__FXIO1_D0                                   0x0190 0x0204 0x2 0x2
+#define IMX7ULP_PAD_PTF4__LPSPI2_PCS1                                0x0190 0x02a0 0x3 0x3
+#define IMX7ULP_PAD_PTF4__LPUART5_CTS_B                              0x0190 0x0250 0x4 0x3
+#define IMX7ULP_PAD_PTF4__LPI2C5_SCL                                 0x0190 0x02bc 0x5 0x3
+#define IMX7ULP_PAD_PTF4__TPM4_CH3                                   0x0190 0x028c 0x6 0x2
+#define IMX7ULP_PAD_PTF4__FB_AD17                                    0x0190 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF5__PTF5                                       0x0194 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF5__VIU_D1                                     0x0194 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF5__FXIO1_D1                                   0x0194 0x0208 0x2 0x2
+#define IMX7ULP_PAD_PTF5__LPSPI2_PCS2                                0x0194 0x02a4 0x3 0x3
+#define IMX7ULP_PAD_PTF5__LPUART5_RTS_B                              0x0194 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF5__LPI2C5_SDA                                 0x0194 0x02c0 0x5 0x3
+#define IMX7ULP_PAD_PTF5__TPM4_CH4                                   0x0194 0x0290 0x6 0x2
+#define IMX7ULP_PAD_PTF5__FB_AD18                                    0x0194 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF6__PTF6                                       0x0198 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF6__VIU_D2                                     0x0198 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF6__FXIO1_D2                                   0x0198 0x020c 0x2 0x2
+#define IMX7ULP_PAD_PTF6__LPSPI2_PCS3                                0x0198 0x02a8 0x3 0x3
+#define IMX7ULP_PAD_PTF6__LPUART5_TX                                 0x0198 0x0258 0x4 0x3
+#define IMX7ULP_PAD_PTF6__LPI2C5_HREQ                                0x0198 0x02b8 0x5 0x3
+#define IMX7ULP_PAD_PTF6__TPM4_CH5                                   0x0198 0x0294 0x6 0x2
+#define IMX7ULP_PAD_PTF6__FB_AD19                                    0x0198 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF7__PTF7                                       0x019c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF7__VIU_D3                                     0x019c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF7__FXIO1_D3                                   0x019c 0x0210 0x2 0x2
+#define IMX7ULP_PAD_PTF7__LPUART5_RX                                 0x019c 0x0254 0x4 0x3
+#define IMX7ULP_PAD_PTF7__TPM5_CH1                                   0x019c 0x02c8 0x6 0x3
+#define IMX7ULP_PAD_PTF7__FB_AD20                                    0x019c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF8__PTF8                                       0x01a0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF8__USB1_ULPI_CLK                              0x01a0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF8__VIU_D4                                     0x01a0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF8__FXIO1_D4                                   0x01a0 0x0214 0x2 0x2
+#define IMX7ULP_PAD_PTF8__LPSPI2_SIN                                 0x01a0 0x02b0 0x3 0x3
+#define IMX7ULP_PAD_PTF8__LPUART6_CTS_B                              0x01a0 0x025c 0x4 0x3
+#define IMX7ULP_PAD_PTF8__LPI2C6_SCL                                 0x01a0 0x02fc 0x5 0x3
+#define IMX7ULP_PAD_PTF8__TPM5_CLKIN                                 0x01a0 0x02cc 0x6 0x3
+#define IMX7ULP_PAD_PTF8__FB_AD21                                    0x01a0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF9__PTF9                                       0x01a4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF9__USB1_ULPI_NXT                              0x01a4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF9__VIU_D5                                     0x01a4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF9__FXIO1_D5                                   0x01a4 0x0218 0x2 0x2
+#define IMX7ULP_PAD_PTF9__LPSPI2_SOUT                                0x01a4 0x02b4 0x3 0x3
+#define IMX7ULP_PAD_PTF9__LPUART6_RTS_B                              0x01a4 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF9__LPI2C6_SDA                                 0x01a4 0x0300 0x5 0x3
+#define IMX7ULP_PAD_PTF9__TPM5_CH0                                   0x01a4 0x02c4 0x6 0x3
+#define IMX7ULP_PAD_PTF9__FB_AD22                                    0x01a4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF10__PTF10                                     0x01a8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF10__USB1_ULPI_STP                             0x01a8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF10__VIU_D6                                    0x01a8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF10__FXIO1_D6                                  0x01a8 0x021c 0x2 0x2
+#define IMX7ULP_PAD_PTF10__LPSPI2_SCK                                0x01a8 0x02ac 0x3 0x3
+#define IMX7ULP_PAD_PTF10__LPUART6_TX                                0x01a8 0x0264 0x4 0x3
+#define IMX7ULP_PAD_PTF10__LPI2C6_HREQ                               0x01a8 0x02f8 0x5 0x3
+#define IMX7ULP_PAD_PTF10__TPM7_CH3                                  0x01a8 0x02e8 0x6 0x3
+#define IMX7ULP_PAD_PTF10__FB_AD23                                   0x01a8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF11__PTF11                                     0x01ac 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF11__USB1_ULPI_DIR                             0x01ac 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF11__VIU_D7                                    0x01ac 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF11__FXIO1_D7                                  0x01ac 0x0220 0x2 0x2
+#define IMX7ULP_PAD_PTF11__LPSPI2_PCS0                               0x01ac 0x029c 0x3 0x3
+#define IMX7ULP_PAD_PTF11__LPUART6_RX                                0x01ac 0x0260 0x4 0x3
+#define IMX7ULP_PAD_PTF11__TPM7_CH4                                  0x01ac 0x02ec 0x6 0x3
+#define IMX7ULP_PAD_PTF11__FB_CS4_B_FB_TSIZ0_FB_BE31_24_BLS7_0_B     0x01ac 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF12__PTF12                                     0x01b0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF12__USB1_ULPI_DATA0                           0x01b0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF12__VIU_D8                                    0x01b0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF12__FXIO1_D8                                  0x01b0 0x0224 0x2 0x2
+#define IMX7ULP_PAD_PTF12__LPSPI3_PCS1                               0x01b0 0x0314 0x3 0x3
+#define IMX7ULP_PAD_PTF12__LPUART7_CTS_B                             0x01b0 0x0268 0x4 0x3
+#define IMX7ULP_PAD_PTF12__LPI2C7_SCL                                0x01b0 0x0308 0x5 0x3
+#define IMX7ULP_PAD_PTF12__TPM7_CH5                                  0x01b0 0x02f0 0x6 0x3
+#define IMX7ULP_PAD_PTF12__FB_AD24                                   0x01b0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF13__PTF13                                     0x01b4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF13__USB1_ULPI_DATA1                           0x01b4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF13__VIU_D9                                    0x01b4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF13__FXIO1_D9                                  0x01b4 0x0228 0x2 0x2
+#define IMX7ULP_PAD_PTF13__LPSPI3_PCS2                               0x01b4 0x0318 0x3 0x3
+#define IMX7ULP_PAD_PTF13__LPUART7_RTS_B                             0x01b4 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF13__LPI2C7_SDA                                0x01b4 0x030c 0x5 0x3
+#define IMX7ULP_PAD_PTF13__TPM7_CLKIN                                0x01b4 0x02f4 0x6 0x3
+#define IMX7ULP_PAD_PTF13__FB_AD25                                   0x01b4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF14__PTF14                                     0x01b8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF14__USB1_ULPI_DATA2                           0x01b8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF14__VIU_D10                                   0x01b8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF14__FXIO1_D10                                 0x01b8 0x022c 0x2 0x2
+#define IMX7ULP_PAD_PTF14__LPSPI3_PCS3                               0x01b8 0x031c 0x3 0x3
+#define IMX7ULP_PAD_PTF14__LPUART7_TX                                0x01b8 0x0270 0x4 0x3
+#define IMX7ULP_PAD_PTF14__LPI2C7_HREQ                               0x01b8 0x0304 0x5 0x3
+#define IMX7ULP_PAD_PTF14__TPM7_CH0                                  0x01b8 0x02dc 0x6 0x3
+#define IMX7ULP_PAD_PTF14__FB_AD26                                   0x01b8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF15__PTF15                                     0x01bc 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF15__USB1_ULPI_DATA3                           0x01bc 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF15__VIU_D11                                   0x01bc 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF15__FXIO1_D11                                 0x01bc 0x0230 0x2 0x2
+#define IMX7ULP_PAD_PTF15__LPUART7_RX                                0x01bc 0x026c 0x4 0x3
+#define IMX7ULP_PAD_PTF15__TPM7_CH1                                  0x01bc 0x02e0 0x6 0x3
+#define IMX7ULP_PAD_PTF15__FB_AD27                                   0x01bc 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF16__PTF16                                     0x01c0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF16__USB1_ULPI_DATA4                           0x01c0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF16__VIU_D12                                   0x01c0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF16__FXIO1_D12                                 0x01c0 0x0234 0x2 0x2
+#define IMX7ULP_PAD_PTF16__LPSPI3_SIN                                0x01c0 0x0324 0x3 0x3
+#define IMX7ULP_PAD_PTF16__TPM7_CH2                                  0x01c0 0x02e4 0x6 0x3
+#define IMX7ULP_PAD_PTF16__FB_AD28                                   0x01c0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF17__PTF17                                     0x01c4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF17__USB1_ULPI_DATA5                           0x01c4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF17__VIU_D13                                   0x01c4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF17__FXIO1_D13                                 0x01c4 0x0238 0x2 0x2
+#define IMX7ULP_PAD_PTF17__LPSPI3_SOUT                               0x01c4 0x0328 0x3 0x3
+#define IMX7ULP_PAD_PTF17__TPM6_CLKIN                                0x01c4 0x02d8 0x6 0x3
+#define IMX7ULP_PAD_PTF17__FB_AD29                                   0x01c4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF18__PTF18                                     0x01c8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF18__USB1_ULPI_DATA6                           0x01c8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF18__VIU_D14                                   0x01c8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF18__FXIO1_D14                                 0x01c8 0x023c 0x2 0x2
+#define IMX7ULP_PAD_PTF18__LPSPI3_SCK                                0x01c8 0x0320 0x3 0x3
+#define IMX7ULP_PAD_PTF18__TPM6_CH0                                  0x01c8 0x02d0 0x6 0x3
+#define IMX7ULP_PAD_PTF18__FB_AD30                                   0x01c8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF19__PTF19                                     0x01cc 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF19__USB1_ULPI_DATA7                           0x01cc 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF19__VIU_D15                                   0x01cc 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF19__FXIO1_D15                                 0x01cc 0x0240 0x2 0x2
+#define IMX7ULP_PAD_PTF19__LPSPI3_PCS0                               0x01cc 0x0310 0x3 0x3
+#define IMX7ULP_PAD_PTF19__TPM6_CH1                                  0x01cc 0x02d4 0x6 0x3
+#define IMX7ULP_PAD_PTF19__FB_AD31                                   0x01cc 0x0000 0x9 0x0
+
+#endif /* __DTS_IMX7ULP_PINFUNC_H */
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 7bb9df2c1460..9319e1f0f1d8 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -129,14 +129,14 @@
 		};
 
 		msi1: msi-controller@1570e00 {
-			compatible = "fsl,1s1021a-msi";
+			compatible = "fsl,ls1021a-msi";
 			reg = <0x0 0x1570e00 0x0 0x8>;
 			msi-controller;
 			interrupts =  <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		msi2: msi-controller@1570e08 {
-			compatible = "fsl,1s1021a-msi";
+			compatible = "fsl,ls1021a-msi";
 			reg = <0x0 0x1570e08 0x0 0x8>;
 			msi-controller;
 			interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>;
@@ -699,7 +699,7 @@
 			bus-range = <0x0 0xff>;
 			ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
 				  0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
-			msi-parent = <&msi1>;
+			msi-parent = <&msi1>, <&msi2>;
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0000 0 0 1 &gic GIC_SPI 91  IRQ_TYPE_LEVEL_HIGH>,
@@ -722,7 +722,7 @@
 			bus-range = <0x0 0xff>;
 			ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
 				  0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
-			msi-parent = <&msi2>;
+			msi-parent = <&msi1>, <&msi2>;
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0000 0 0 1 &gic GIC_SPI 92  IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index df3366fa5409..cb47ae79a5f9 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -265,6 +265,20 @@
 
 &i2c2 {
 	clock-frequency = <400000>;
+
+	as3645a@30 {
+		reg = <0x30>;
+		compatible = "ams,as3645a";
+		flash {
+			flash-timeout-us = <150000>;
+			flash-max-microamp = <320000>;
+			led-max-microamp = <60000>;
+			peak-current-limit = <1750000>;
+		};
+		indicator {
+			led-max-microamp = <10000>;
+		};
+	};
 };
 
 &i2c3 {
diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts
index 58834330a5ba..1be9daacc4f9 100644
--- a/arch/arm/boot/dts/rk3228-evb.dts
+++ b/arch/arm/boot/dts/rk3228-evb.dts
@@ -50,6 +50,16 @@
 		device_type = "memory";
 		reg = <0x60000000 0x40000000>;
 	};
+
+	vcc_phy: vcc-phy-regulator {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		regulator-name = "vcc_phy";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
 };
 
 &emmc {
@@ -60,6 +70,30 @@
 	status = "okay";
 };
 
+&gmac {
+	assigned-clocks = <&cru SCLK_MAC_SRC>;
+	assigned-clock-rates = <50000000>;
+	clock_in_out = "output";
+	phy-supply = <&vcc_phy>;
+	phy-mode = "rmii";
+	phy-handle = <&phy>;
+	status = "okay";
+
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		phy: phy@0 {
+			compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+			reg = <0>;
+			clocks = <&cru SCLK_MAC_PHY>;
+			resets = <&cru SRST_MACPHY>;
+			phy-is-integrated;
+		};
+	};
+};
+
 &tsadc {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/tango4-smp8758.dtsi b/arch/arm/boot/dts/tango4-smp8758.dtsi
index d2e65c46bcc7..eca33d568690 100644
--- a/arch/arm/boot/dts/tango4-smp8758.dtsi
+++ b/arch/arm/boot/dts/tango4-smp8758.dtsi
@@ -13,7 +13,6 @@
 			reg = <0>;
 			clocks = <&clkgen CPU_CLK>;
 			clock-latency = <1>;
-			operating-points = <1215000 0 607500 0 405000 0 243000 0 135000 0>;
 		};
 
 		cpu1: cpu@1 {
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index e74de69caeab..1736813bdea7 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -226,7 +226,7 @@ CONFIG_REGULATOR_MC13892=y
 CONFIG_REGULATOR_PFUZE100=y
 CONFIG_MEDIA_SUPPORT=y
 CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
 CONFIG_RC_DEVICES=y
 CONFIG_IR_GPIO_CIR=y
 CONFIG_MEDIA_USB_SUPPORT=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 4d19c1b4b8e7..94d7e71c69c4 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -270,6 +270,7 @@ CONFIG_ICPLUS_PHY=y
 CONFIG_REALTEK_PHY=y
 CONFIG_MICREL_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_ROCKCHIP_PHY=y
 CONFIG_USB_PEGASUS=y
 CONFIG_USB_RTL8152=m
 CONFIG_USB_USBNET=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index a120ae816260..0414acf731ce 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -304,7 +304,7 @@ CONFIG_REGULATOR_TPS65910=y
 CONFIG_REGULATOR_TWL4030=y
 CONFIG_MEDIA_SUPPORT=m
 CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=m
 CONFIG_MEDIA_CONTROLLER=y
 CONFIG_VIDEO_V4L2_SUBDEV_API=y
 CONFIG_LIRC=m
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 0ec1d1ec130f..22cd559531a9 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -95,7 +95,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_AXP20X=y
 CONFIG_REGULATOR_GPIO=y
 CONFIG_MEDIA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
 CONFIG_RC_DEVICES=y
 CONFIG_IR_SUNXI=y
 CONFIG_DRM=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b9adedcc5b2e..ec72752d5668 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
 	  ARMv8 Crypto Extensions
 
 config CRYPTO_GHASH_ARM_CE
-	tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	help
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-	  that is part of the ARMv8 Crypto Extensions
+	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
+	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 0f966a8ca1ce..d0a9cec73707 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index c817a86c4ca8..54b384084637 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 	.text
 	.align		5
@@ -32,19 +33,19 @@
 	.endif
 	.endm
 
-	.macro		__load, out, in, idx
+	.macro		__load, out, in, idx, sz, op
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr		\out, [ttab, \in, lsr #(8 * \idx) - 2]
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
 	.else
-	ldr		\out, [ttab, \in, lsl #2]
+	ldr\op		\out, [ttab, \in, lsl #\sz]
 	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
-	__load		\out0, \out0, 0
-	__load		t0, t0, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
 
 	.if		\enc
 	__select	\out1, \in1, 0
@@ -53,10 +54,10 @@
 	__select	\out1, \in3, 0
 	__select	t1, \in0, 1
 	.endif
-	__load		\out1, \out1, 0
+	__load		\out1, \out1, 0, \sz, \op
 	__select	t2, \in2, 2
-	__load		t1, t1, 1
-	__load		t2, t2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
 
 	eor		\out0, \out0, t0, ror #24
 
@@ -68,9 +69,9 @@
 	__select	\t3, \in1, 2
 	__select	\t4, \in2, 3
 	.endif
-	__load		\t3, \t3, 2
-	__load		t0, t0, 3
-	__load		\t4, \t4, 3
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
 
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
 	eor		\out1, \out1, t2
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
 	.macro		__rev, out, in
@@ -114,7 +115,7 @@
 	.endif
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
+	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 
 	ldr		r4, [in]
@@ -146,9 +147,12 @@
 
 1:	subs		rounds, rounds, #4
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	__adrl		ttab, \ltab, ls
+	bls		2f
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	bhi		0b
+	b		0b
+
+2:	__adrl		ttab, \ltab
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
@@ -170,10 +174,48 @@
 	.ltorg
 	.endm
 
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 
+	.align		5
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index c76377961444..18768f330449 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index f6ab8bcc9efe..2f78c10b1881 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -12,40 +12,162 @@
 #include <asm/assembler.h>
 
 	SHASH		.req	q0
-	SHASH2		.req	q1
-	T1		.req	q2
-	T2		.req	q3
-	MASK		.req	q4
-	XL		.req	q5
-	XM		.req	q6
-	XH		.req	q7
-	IN1		.req	q7
+	T1		.req	q1
+	XL		.req	q2
+	XM		.req	q3
+	XH		.req	q4
+	IN1		.req	q4
 
 	SHASH_L		.req	d0
 	SHASH_H		.req	d1
-	SHASH2_L	.req	d2
-	T1_L		.req	d4
-	MASK_L		.req	d8
-	XL_L		.req	d10
-	XL_H		.req	d11
-	XM_L		.req	d12
-	XM_H		.req	d13
-	XH_L		.req	d14
+	T1_L		.req	d2
+	T1_H		.req	d3
+	XL_L		.req	d4
+	XL_H		.req	d5
+	XM_L		.req	d6
+	XM_H		.req	d7
+	XH_L		.req	d8
+
+	t0l		.req	d10
+	t0h		.req	d11
+	t1l		.req	d12
+	t1h		.req	d13
+	t2l		.req	d14
+	t2h		.req	d15
+	t3l		.req	d16
+	t3h		.req	d17
+	t4l		.req	d18
+	t4h		.req	d19
+
+	t0q		.req	q5
+	t1q		.req	q6
+	t2q		.req	q7
+	t3q		.req	q8
+	t4q		.req	q9
+	T2		.req	q9
+
+	s1l		.req	d20
+	s1h		.req	d21
+	s2l		.req	d22
+	s2h		.req	d23
+	s3l		.req	d24
+	s3h		.req	d25
+	s4l		.req	d26
+	s4h		.req	d27
+
+	MASK		.req	d28
+	SHASH2_p8	.req	d28
+
+	k16		.req	d29
+	k32		.req	d30
+	k48		.req	d31
+	SHASH2_p64	.req	d31
 
 	.text
 	.fpu		crypto-neon-fp-armv8
 
+	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
+	vmull.p64	\rd, \rn, \rm
+	.endm
+
 	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
+	 * This implementation of 64x64 -> 128 bit polynomial multiplication
+	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+	 * "Fast Software Polynomial Multiplication on ARM Processors Using
+	 * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+	 *
+	 * It has been slightly tweaked for in-order performance, and to allow
+	 * 'rq' to overlap with 'ad' or 'bd'.
 	 */
-ENTRY(pmull_ghash_update)
-	vld1.64		{SHASH}, [r3]
+	.macro		__pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+	vext.8		t0l, \ad, \ad, #1	@ A1
+	.ifc		\b1, t4l
+	vext.8		t4l, \bd, \bd, #1	@ B1
+	.endif
+	vmull.p8	t0q, t0l, \bd		@ F = A1*B
+	vext.8		t1l, \ad, \ad, #2	@ A2
+	vmull.p8	t4q, \ad, \b1		@ E = A*B1
+	.ifc		\b2, t3l
+	vext.8		t3l, \bd, \bd, #2	@ B2
+	.endif
+	vmull.p8	t1q, t1l, \bd		@ H = A2*B
+	vext.8		t2l, \ad, \ad, #3	@ A3
+	vmull.p8	t3q, \ad, \b2		@ G = A*B2
+	veor		t0q, t0q, t4q		@ L = E + F
+	.ifc		\b3, t4l
+	vext.8		t4l, \bd, \bd, #3	@ B3
+	.endif
+	vmull.p8	t2q, t2l, \bd		@ J = A3*B
+	veor		t0l, t0l, t0h		@ t0 = (L) (P0 + P1) << 8
+	veor		t1q, t1q, t3q		@ M = G + H
+	.ifc		\b4, t3l
+	vext.8		t3l, \bd, \bd, #4	@ B4
+	.endif
+	vmull.p8	t4q, \ad, \b3		@ I = A*B3
+	veor		t1l, t1l, t1h		@ t1 = (M) (P2 + P3) << 16
+	vmull.p8	t3q, \ad, \b4		@ K = A*B4
+	vand		t0h, t0h, k48
+	vand		t1h, t1h, k32
+	veor		t2q, t2q, t4q		@ N = I + J
+	veor		t0l, t0l, t0h
+	veor		t1l, t1l, t1h
+	veor		t2l, t2l, t2h		@ t2 = (N) (P4 + P5) << 24
+	vand		t2h, t2h, k16
+	veor		t3l, t3l, t3h		@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	t3h, #0
+	vext.8		t0q, t0q, t0q, #15
+	veor		t2l, t2l, t2h
+	vext.8		t1q, t1q, t1q, #14
+	vmull.p8	\rq, \ad, \bd		@ D = A*B
+	vext.8		t2q, t2q, t2q, #13
+	vext.8		t3q, t3q, t3q, #12
+	veor		t0q, t0q, t1q
+	veor		t2q, t2q, t3q
+	veor		\rq, \rq, t0q
+	veor		\rq, \rq, t2q
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	vmull.p64	T1, XL_L, MASK
+
+	veor		XH_L, XH_L, XM_H
+	vext.8		T1, T1, T1, #8
+	veor		XL_H, XL_H, XM_L
+	veor		T1, T1, XL
+
+	vmull.p64	XL, T1_H, MASK
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	veor		XL_H, XL_H, XM_L
+	veor		XH_L, XH_L, XM_H
+
+	vshl.i64	T1, XL, #57
+	vshl.i64	T2, XL, #62
+	veor		T1, T1, T2
+	vshl.i64	T2, XL, #63
+	veor		T1, T1, T2
+	veor		XL_H, XL_H, T1_L
+	veor		XH_L, XH_L, T1_H
+
+	vshr.u64	T1, XL, #1
+	veor		XH, XH, XL
+	veor		XL, XL, T1
+	vshr.u64	T1, T1, #6
+	vshr.u64	XL, XL, #1
+	.endm
+
+	.macro		ghash_update, pn
 	vld1.64		{XL}, [r1]
-	vmov.i8		MASK, #0xe1
-	vext.8		SHASH2, SHASH, SHASH, #8
-	vshl.u64	MASK, MASK, #57
-	veor		SHASH2, SHASH2, SHASH
 
 	/* do the head block first, if supplied */
 	ldr		ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
 #ifndef CONFIG_CPU_BIG_ENDIAN
 	vrev64.8	T1, T1
 #endif
-	vext.8		T2, XL, XL, #8
 	vext.8		IN1, T1, T1, #8
-	veor		T1, T1, T2
+	veor		T1_L, T1_L, XL_H
 	veor		XL, XL, IN1
 
-	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
+	__pmull_\pn	XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h	@ a1 * b1
 	veor		T1, T1, XL
-	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
-	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
+	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
+	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
 
-	vext.8		T1, XL, XH, #8
-	veor		T2, XL, XH
+	veor		T1, XL, XH
 	veor		XM, XM, T1
-	veor		XM, XM, T2
-	vmull.p64	T2, XL_L, MASK_L
 
-	vmov		XH_L, XM_H
-	vmov		XM_H, XL_L
+	__pmull_reduce_\pn
 
-	veor		XL, XM, T2
-	vext.8		T2, XL, XL, #8
-	vmull.p64	XL, XL_L, MASK_L
-	veor		T2, T2, XH
-	veor		XL, XL, T2
+	veor		T1, T1, XH
+	veor		XL, XL, T1
 
 	bne		0b
 
 	vst1.64		{XL}, [r1]
 	bx		lr
-ENDPROC(pmull_ghash_update)
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p64, SHASH_L, SHASH_H
+
+	vmov.i8		MASK, #0xe1
+	vshl.u64	MASK, MASK, #57
+
+	ghash_update	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p8, SHASH_L, SHASH_H
+
+	vext.8		s1l, SHASH_L, SHASH_L, #1
+	vext.8		s2l, SHASH_L, SHASH_L, #2
+	vext.8		s3l, SHASH_L, SHASH_L, #3
+	vext.8		s4l, SHASH_L, SHASH_L, #4
+	vext.8		s1h, SHASH_H, SHASH_H, #1
+	vext.8		s2h, SHASH_H, SHASH_H, #2
+	vext.8		s3h, SHASH_H, SHASH_H, #3
+	vext.8		s4h, SHASH_H, SHASH_H, #4
+
+	vmov.i64	k16, #0xffff
+	vmov.i64	k32, #0xffffffff
+	vmov.i64	k48, #0xffffffffffff
+
+	ghash_update	p8
+ENDPROC(pmull_ghash_update_p8)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 6bac8bea9f1e..d9bb52cae2ac 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
 {
 	int err;
 
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 		return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 27475904e096..eee269321923 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -276,6 +276,12 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
 #define gicr_write_pendbaser(v, c)	__gic_writeq_nonatomic(v, c)
 
 /*
+ * GICR_xLPIR - only the lower bits are significant
+ */
+#define gic_read_lpir(c)		readl_relaxed(c)
+#define gic_write_lpir(v, c)		writel_relaxed(lower_32_bits(v), c)
+
+/*
  * GITS_TYPER is an ID register and doesn't need atomicity.
  */
 #define gits_read_typer(c)		__gic_readq_nonatomic(c)
@@ -291,5 +297,33 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
  */
 #define gits_write_cwriter(v, c)	__gic_writeq_nonatomic(v, c)
 
+/*
+ * GITS_VPROPBASER - hi and lo bits may be accessed independently.
+ */
+#define gits_write_vpropbaser(v, c)	__gic_writeq_nonatomic(v, c)
+
+/*
+ * GITS_VPENDBASER - the Valid bit must be cleared before changing
+ * anything else.
+ */
+static inline void gits_write_vpendbaser(u64 val, void * __iomem addr)
+{
+	u32 tmp;
+
+	tmp = readl_relaxed(addr + 4);
+	if (tmp & (GICR_VPENDBASER_Valid >> 32)) {
+		tmp &= ~(GICR_VPENDBASER_Valid >> 32);
+		writel_relaxed(tmp, addr + 4);
+	}
+
+	/*
+	 * Use the fact that __gic_writeq_nonatomic writes the second
+	 * half of the 64bit quantity after the first.
+	 */
+	__gic_writeq_nonatomic(val, addr);
+}
+
+#define gits_read_vpendbaser(c)		__gic_readq_nonatomic(c)
+
 #endif /* !__ASSEMBLY__ */
 #endif /* !__ASM_ARCH_GICV3_H */
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 6795368ad023..cc414382dab4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -128,20 +128,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 #endif /* !SMP */
 
 static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret, tmp;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 #ifndef CONFIG_SMP
 	preempt_disable();
 #endif
@@ -172,17 +162,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 	preempt_enable();
 #endif
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 4bec45442072..c030143c18c6 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -52,22 +52,6 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	u16 owner = READ_ONCE(lock->tickets.owner);
-
-	for (;;) {
-		arch_spinlock_t tmp = READ_ONCE(*lock);
-
-		if (tmp.tickets.owner == tmp.tickets.next ||
-		    tmp.tickets.owner != owner)
-			break;
-
-		wfe();
-	}
-	smp_acquire__after_ctrl_dep();
-}
-
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 776757d1604a..1d468b527b7b 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,10 +139,11 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_NEED_RESCHED	1	/* rescheduling necessary */
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_UPROBE		3	/* breakpointed or singlestepping */
-#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
-#define TIF_SECCOMP		7	/* seccomp syscall filtering active */
+#define TIF_FSCHECK		4	/* Check FS is USER_DS on return */
+#define TIF_SYSCALL_TRACE	5	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT	7	/* syscall tracepoint instrumentation */
+#define TIF_SECCOMP		8	/* seccomp syscall filtering active */
 
 #define TIF_NOHZ		12	/* in adaptive nohz mode */
 #define TIF_USING_IWMMXT	17
@@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
@@ -166,8 +168,9 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 /*
  * Change these and you break ASM code in entry-common.S
  */
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING |	\
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
+				 _TIF_FSCHECK)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f555bb3664dc..683d9230984a 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -18,7 +18,6 @@ struct undef_hook {
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static inline int __in_irqentry_text(unsigned long ptr)
 {
 	extern char __irqentry_text_start[];
@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
 	return ptr >= (unsigned long)&__irqentry_text_start &&
 	       ptr < (unsigned long)&__irqentry_text_end;
 }
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
-	return 0;
-}
-#endif
 
 static inline int in_exception_text(unsigned long ptr)
 {
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 0bf2347495f1..87936dd5d151 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -70,6 +70,8 @@ static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
 	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
 }
 
 #define segment_eq(a, b)	((a) == (b))
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..e33c32d56193 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -41,7 +41,9 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	fast_work_pending
+	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
 
 	/* perform architecture specific actions before user return */
@@ -67,12 +69,15 @@ ret_fast_syscall:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	fast_work_pending
+	tst	r1, #_TIF_WORK_MASK
 	beq	no_work_pending
  UNWIND(.fnend		)
 ENDPROC(ret_fast_syscall)
 
 	/* Slower path - fall through to work_pending */
+fast_work_pending:
 #endif
 
 	tst	r1, #_TIF_SYSCALL_WORK
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5814298ef0b7..e2de50bf8742 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -613,6 +614,10 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	 * Update the trace code with the current status.
 	 */
 	trace_hardirqs_off();
+
+	/* Check valid user FS if needed */
+	addr_limit_user_check();
+
 	do {
 		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
 			schedule();
diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig
index a3b091a4d344..65a048fa08ec 100644
--- a/arch/arm/mach-hisi/Kconfig
+++ b/arch/arm/mach-hisi/Kconfig
@@ -39,6 +39,7 @@ config ARCH_HIP04
 	select HAVE_ARM_ARCH_TIMER
 	select MCPM if SMP
 	select MCPM_QUAD_CLUSTER if SMP
+	select GENERIC_IRQ_EFFECTIVE_AFF_MASK
 	help
 	  Support for Hisilicon HiP04 SoC family
 
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 779fb1f680b3..b3b3b3a19183 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -I$(srctree)/$(src)/include \
 # Common support
 obj-y := id.o io.o control.o devices.o fb.o timer.o pm.o \
 	 common.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \
-	 omap_device.o omap-headsmp.o sram.o drm.o
+	 omap_device.o omap-headsmp.o sram.o
 
 hwmod-common				= omap_hwmod.o omap_hwmod_reset.o \
 					  omap_hwmod_common_data.o
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index b1e661bb5521..583fc39d84cd 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -33,6 +33,7 @@ static void __init __maybe_unused omap_generic_init(void)
 	pdata_quirks_init(omap_dt_match_table);
 
 	omapdss_init_of();
+	omap_soc_device_init();
 }
 
 #ifdef CONFIG_SOC_OMAP2420
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 8fa01c0ecdb2..b3f6eb5d04a2 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -66,6 +66,7 @@
  */
 #define FRAMEDONE_IRQ_TIMEOUT		100
 
+#if defined(CONFIG_FB_OMAP2)
 static struct platform_device omap_display_device = {
 	.name          = "omapdss",
 	.id            = -1,
@@ -163,6 +164,65 @@ static enum omapdss_version __init omap_display_get_version(void)
 		return OMAPDSS_VER_UNKNOWN;
 }
 
+static int __init omapdss_init_fbdev(void)
+{
+	static struct omap_dss_board_info board_data = {
+		.dsi_enable_pads = omap_dsi_enable_pads,
+		.dsi_disable_pads = omap_dsi_disable_pads,
+		.set_min_bus_tput = omap_dss_set_min_bus_tput,
+	};
+	struct device_node *node;
+	int r;
+
+	board_data.version = omap_display_get_version();
+	if (board_data.version == OMAPDSS_VER_UNKNOWN) {
+		pr_err("DSS not supported on this SoC\n");
+		return -ENODEV;
+	}
+
+	omap_display_device.dev.platform_data = &board_data;
+
+	r = platform_device_register(&omap_display_device);
+	if (r < 0) {
+		pr_err("Unable to register omapdss device\n");
+		return r;
+	}
+
+	/* create vrfb device */
+	r = omap_init_vrfb();
+	if (r < 0) {
+		pr_err("Unable to register omapvrfb device\n");
+		return r;
+	}
+
+	/* create FB device */
+	r = omap_init_fb();
+	if (r < 0) {
+		pr_err("Unable to register omapfb device\n");
+		return r;
+	}
+
+	/* create V4L2 display device */
+	r = omap_init_vout();
+	if (r < 0) {
+		pr_err("Unable to register omap_vout device\n");
+		return r;
+	}
+
+	/* add DSI info for omap4 */
+	node = of_find_node_by_name(NULL, "omap4_padconf_global");
+	if (node)
+		omap4_dsi_mux_syscon = syscon_node_to_regmap(node);
+
+	return 0;
+}
+#else
+static inline int omapdss_init_fbdev(void)
+{
+	return 0;
+}
+#endif /* CONFIG_FB_OMAP2 */
+
 static void dispc_disable_outputs(void)
 {
 	u32 v, irq_mask = 0;
@@ -335,16 +395,9 @@ static struct device_node * __init omapdss_find_dss_of_node(void)
 int __init omapdss_init_of(void)
 {
 	int r;
-	enum omapdss_version ver;
 	struct device_node *node;
 	struct platform_device *pdev;
 
-	static struct omap_dss_board_info board_data = {
-		.dsi_enable_pads = omap_dsi_enable_pads,
-		.dsi_disable_pads = omap_dsi_disable_pads,
-		.set_min_bus_tput = omap_dss_set_min_bus_tput,
-	};
-
 	/* only create dss helper devices if dss is enabled in the .dts */
 
 	node = omapdss_find_dss_of_node();
@@ -354,13 +407,6 @@ int __init omapdss_init_of(void)
 	if (!of_device_is_available(node))
 		return 0;
 
-	ver = omap_display_get_version();
-
-	if (ver == OMAPDSS_VER_UNKNOWN) {
-		pr_err("DSS not supported on this SoC\n");
-		return -ENODEV;
-	}
-
 	pdev = of_find_device_by_node(node);
 
 	if (!pdev) {
@@ -374,48 +420,5 @@ int __init omapdss_init_of(void)
 		return r;
 	}
 
-	board_data.version = ver;
-
-	omap_display_device.dev.platform_data = &board_data;
-
-	r = platform_device_register(&omap_display_device);
-	if (r < 0) {
-		pr_err("Unable to register omapdss device\n");
-		return r;
-	}
-
-	/* create DRM device */
-	r = omap_init_drm();
-	if (r < 0) {
-		pr_err("Unable to register omapdrm device\n");
-		return r;
-	}
-
-	/* create vrfb device */
-	r = omap_init_vrfb();
-	if (r < 0) {
-		pr_err("Unable to register omapvrfb device\n");
-		return r;
-	}
-
-	/* create FB device */
-	r = omap_init_fb();
-	if (r < 0) {
-		pr_err("Unable to register omapfb device\n");
-		return r;
-	}
-
-	/* create V4L2 display device */
-	r = omap_init_vout();
-	if (r < 0) {
-		pr_err("Unable to register omap_vout device\n");
-		return r;
-	}
-
-	/* add DSI info for omap4 */
-	node = of_find_node_by_name(NULL, "omap4_padconf_global");
-	if (node)
-		omap4_dsi_mux_syscon = syscon_node_to_regmap(node);
-
-	return 0;
+	return omapdss_init_fbdev();
 }
diff --git a/arch/arm/mach-omap2/display.h b/arch/arm/mach-omap2/display.h
index 9a39646d4316..42ec2e99a2f4 100644
--- a/arch/arm/mach-omap2/display.h
+++ b/arch/arm/mach-omap2/display.h
@@ -26,7 +26,6 @@ struct omap_dss_dispc_dev_attr {
 	bool	has_framedonetv_irq;
 };
 
-int omap_init_drm(void);
 int omap_init_vrfb(void);
 int omap_init_fb(void);
 int omap_init_vout(void);
diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c
deleted file mode 100644
index 44fef961bb70..000000000000
--- a/arch/arm/mach-omap2/drm.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * DRM/KMS device registration for TI OMAP platforms
- *
- * Copyright (C) 2012 Texas Instruments
- * Author: Rob Clark <rob.clark@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_data/omap_drm.h>
-
-#include "soc.h"
-#include "display.h"
-
-#if IS_ENABLED(CONFIG_DRM_OMAP)
-
-static struct omap_drm_platform_data platform_data;
-
-static struct platform_device omap_drm_device = {
-	.dev = {
-		.coherent_dma_mask = DMA_BIT_MASK(32),
-		.platform_data = &platform_data,
-	},
-	.name = "omapdrm",
-	.id = 0,
-};
-
-int __init omap_init_drm(void)
-{
-	platform_data.omaprev = GET_OMAP_TYPE;
-
-	return platform_device_register(&omap_drm_device);
-
-}
-#else
-int __init omap_init_drm(void) { return 0; }
-#endif
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 1cd20e4d56b0..cb5d7314cf99 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -428,7 +428,6 @@ static void __init __maybe_unused omap_hwmod_init_postsetup(void)
 static void __init __maybe_unused omap_common_late_init(void)
 {
 	omap2_common_pm_late_init();
-	omap_soc_device_init();
 }
 
 #ifdef CONFIG_SOC_OMAP2420
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index e2c97728b3c6..9d662fed03ec 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -377,7 +377,7 @@ static struct gpiod_lookup_table raumfeld_rotary_gpios_table = {
 	},
 };
 
-static struct property_entry raumfeld_rotary_properties[] = {
+static const struct property_entry raumfeld_rotary_properties[] __initconst = {
 	PROPERTY_ENTRY_INTEGER("rotary-encoder,steps-per-period", u32, 24),
 	PROPERTY_ENTRY_INTEGER("linux,axis",			  u32, REL_X),
 	PROPERTY_ENTRY_INTEGER("rotary-encoder,relative_axis",	  u32, 1),
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index d3aa9be16621..e3fbcfedf845 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -60,7 +60,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
 	return index;
 }
 
-static void tegra114_idle_enter_freeze(struct cpuidle_device *dev,
+static void tegra114_idle_enter_s2idle(struct cpuidle_device *dev,
 				       struct cpuidle_driver *drv,
 				       int index)
 {
@@ -77,7 +77,7 @@ static struct cpuidle_driver tegra_idle_driver = {
 #ifdef CONFIG_PM_SLEEP
 		[1] = {
 			.enter			= tegra114_idle_power_down,
-			.enter_freeze		= tegra114_idle_enter_freeze,
+			.enter_s2idle		= tegra114_idle_enter_s2idle,
 			.exit_latency		= 500,
 			.target_residency	= 1000,
 			.flags			= CPUIDLE_FLAG_TIMER_STOP,
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d5b9fa19b684..c199990e12b6 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1,6 +1,7 @@
 /*
- * Just-In-Time compiler for BPF filters on 32bit ARM
+ * Just-In-Time compiler for eBPF filters on 32bit ARM
  *
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -8,6 +9,7 @@
  * Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bpf.h>
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/errno.h>
@@ -18,54 +20,101 @@
 #include <linux/if_vlan.h>
 
 #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 #include <asm/hwcap.h>
 #include <asm/opcodes.h>
 
 #include "bpf_jit_32.h"
 
+int bpf_jit_enable __read_mostly;
+
+#define STACK_OFFSET(k)	(k)
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)	/* TEMP Register 1 */
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)	/* TEMP Register 2 */
+#define TCALL_CNT	(MAX_BPF_JIT_REG + 2)	/* Tail Call Count */
+
+/* Flags used for JIT optimization */
+#define SEEN_CALL	(1 << 0)
+
+#define FLAG_IMM_OVERFLOW	(1 << 0)
+
 /*
- * ABI:
+ * Map eBPF registers to ARM 32bit registers or stack scratch space.
+ *
+ * 1. First argument is passed using the arm 32bit registers and rest of the
+ * arguments are passed on stack scratch space.
+ * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
+ * arguments are mapped to scratch space on stack.
+ * 3. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ *
+ * As the eBPF registers are all 64 bit registers and arm has only 32 bit
+ * registers, we have to map each eBPF registers with two arm 32 bit regs or
+ * scratch memory space and we have to build eBPF 64 bit register from those.
  *
- * r0	scratch register
- * r4	BPF register A
- * r5	BPF register X
- * r6	pointer to the skb
- * r7	skb->data
- * r8	skb_headlen(skb)
  */
+static const u8 bpf2a32[][2] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = {ARM_R1, ARM_R0},
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = {ARM_R3, ARM_R2},
+	/* Stored on stack scratch space */
+	[BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+	[BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+	[BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+	[BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = {ARM_R5, ARM_R4},
+	/* Stored on stack scratch space */
+	[BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+	[BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+	[BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+	/* Read only Frame Pointer to access Stack */
+	[BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+	/* Temporary Register for internal BPF JIT, can be used
+	 * for constant blindings and others.
+	 */
+	[TMP_REG_1] = {ARM_R7, ARM_R6},
+	[TMP_REG_2] = {ARM_R10, ARM_R8},
+	/* Tail call count. Stored on stack scratch space. */
+	[TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+	/* temporary register for blinding constants.
+	 * Stored on stack scratch space.
+	 */
+	[BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+};
 
-#define r_scratch	ARM_R0
-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
-#define r_off		ARM_R1
-#define r_A		ARM_R4
-#define r_X		ARM_R5
-#define r_skb		ARM_R6
-#define r_skb_data	ARM_R7
-#define r_skb_hl	ARM_R8
-
-#define SCRATCH_SP_OFFSET	0
-#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
-
-#define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
-#define SEEN_MEM_WORD(k)	(1 << (k))
-#define SEEN_X			(1 << BPF_MEMWORDS)
-#define SEEN_CALL		(1 << (BPF_MEMWORDS + 1))
-#define SEEN_SKB		(1 << (BPF_MEMWORDS + 2))
-#define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
+#define	dst_lo	dst[1]
+#define dst_hi	dst[0]
+#define src_lo	src[1]
+#define src_hi	src[0]
 
-#define FLAG_NEED_X_RESET	(1 << 0)
-#define FLAG_IMM_OVERFLOW	(1 << 1)
+/*
+ * JIT Context:
+ *
+ * prog			:	bpf_prog
+ * idx			:	index of current last JITed instruction.
+ * prologue_bytes	:	bytes used in prologue.
+ * epilogue_offset	:	offset of epilogue starting.
+ * seen			:	bit mask used for JIT optimization.
+ * offsets		:	array of eBPF instruction offsets in
+ *				JITed code.
+ * target		:	final JITed code.
+ * epilogue_bytes	:	no of bytes used in epilogue.
+ * imm_count		:	no of immediate counts used for global
+ *				variables.
+ * imms			:	array of global variable addresses.
+ */
 
 struct jit_ctx {
-	const struct bpf_prog *skf;
-	unsigned idx;
-	unsigned prologue_bytes;
-	int ret0_fp_idx;
+	const struct bpf_prog *prog;
+	unsigned int idx;
+	unsigned int prologue_bytes;
+	unsigned int epilogue_offset;
 	u32 seen;
 	u32 flags;
 	u32 *offsets;
 	u32 *target;
+	u32 stack_size;
 #if __LINUX_ARM_ARCH__ < 7
 	u16 epilogue_bytes;
 	u16 imm_count;
@@ -73,68 +122,16 @@ struct jit_ctx {
 #endif
 };
 
-int bpf_jit_enable __read_mostly;
-
-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
-		      unsigned int size)
-{
-	void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
-
-	if (!ptr)
-		return -EFAULT;
-	memcpy(ret, ptr, size);
-	return 0;
-}
-
-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
-{
-	u8 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 1);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 1);
-
-	return (u64)err << 32 | ret;
-}
-
-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
-{
-	u16 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 2);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 2);
-
-	return (u64)err << 32 | ntohs(ret);
-}
-
-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
-{
-	u32 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 4);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 4);
-
-	return (u64)err << 32 | ntohl(ret);
-}
-
 /*
  * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
  * (where the assembly routines like __aeabi_uidiv could cause problems).
  */
-static u32 jit_udiv(u32 dividend, u32 divisor)
+static u32 jit_udiv32(u32 dividend, u32 divisor)
 {
 	return dividend / divisor;
 }
 
-static u32 jit_mod(u32 dividend, u32 divisor)
+static u32 jit_mod32(u32 dividend, u32 divisor)
 {
 	return dividend % divisor;
 }
@@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
 	_emit(ARM_COND_AL, inst, ctx);
 }
 
-static u16 saved_regs(struct jit_ctx *ctx)
+/*
+ * Checks if immediate value can be converted to imm12(12 bits) value.
+ */
+static int16_t imm8m(u32 x)
 {
-	u16 ret = 0;
-
-	if ((ctx->skf->len > 1) ||
-	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
-		ret |= 1 << r_A;
-
-#ifdef CONFIG_FRAME_POINTER
-	ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
-#else
-	if (ctx->seen & SEEN_CALL)
-		ret |= 1 << ARM_LR;
-#endif
-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
-		ret |= 1 << r_skb;
-	if (ctx->seen & SEEN_DATA)
-		ret |= (1 << r_skb_data) | (1 << r_skb_hl);
-	if (ctx->seen & SEEN_X)
-		ret |= 1 << r_X;
-
-	return ret;
-}
+	u32 rot;
 
-static inline int mem_words_used(struct jit_ctx *ctx)
-{
-	/* yes, we do waste some stack space IF there are "holes" in the set" */
-	return fls(ctx->seen & SEEN_MEM);
+	for (rot = 0; rot < 16; rot++)
+		if ((x & ~ror32(0xff, 2 * rot)) == 0)
+			return rol32(x, 2 * rot) | (rot << 8);
+	return -1;
 }
 
+/*
+ * Initializes the JIT space with undefined instructions.
+ */
 static void jit_fill_hole(void *area, unsigned int size)
 {
 	u32 *ptr;
@@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size)
 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
 }
 
-static void build_prologue(struct jit_ctx *ctx)
-{
-	u16 reg_set = saved_regs(ctx);
-	u16 off;
-
-#ifdef CONFIG_FRAME_POINTER
-	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
-	emit(ARM_PUSH(reg_set), ctx);
-	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
-#else
-	if (reg_set)
-		emit(ARM_PUSH(reg_set), ctx);
-#endif
-
-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
-		emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
-
-	if (ctx->seen & SEEN_DATA) {
-		off = offsetof(struct sk_buff, data);
-		emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
-		/* headlen = len - data_len */
-		off = offsetof(struct sk_buff, len);
-		emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
-		off = offsetof(struct sk_buff, data_len);
-		emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
-		emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
-	}
-
-	if (ctx->flags & FLAG_NEED_X_RESET)
-		emit(ARM_MOV_I(r_X, 0), ctx);
-
-	/* do not leak kernel data to userspace */
-	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
-		emit(ARM_MOV_I(r_A, 0), ctx);
-
-	/* stack space for the BPF_MEM words */
-	if (ctx->seen & SEEN_MEM)
-		emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-}
-
-static void build_epilogue(struct jit_ctx *ctx)
-{
-	u16 reg_set = saved_regs(ctx);
-
-	if (ctx->seen & SEEN_MEM)
-		emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-
-	reg_set &= ~(1 << ARM_LR);
+/* Stack must be multiples of 16 Bytes */
+#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
 
-#ifdef CONFIG_FRAME_POINTER
-	/* the first instruction of the prologue was: mov ip, sp */
-	reg_set &= ~(1 << ARM_IP);
-	reg_set |= (1 << ARM_SP);
-	emit(ARM_LDM(ARM_SP, reg_set), ctx);
-#else
-	if (reg_set) {
-		if (ctx->seen & SEEN_CALL)
-			reg_set |= 1 << ARM_PC;
-		emit(ARM_POP(reg_set), ctx);
-	}
+/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP and Tail call counts.
+ */
+#define SCRATCH_SIZE 80
 
-	if (!(ctx->seen & SEEN_CALL))
-		emit(ARM_BX(ARM_LR), ctx);
-#endif
-}
+/* total stack size used in JITed code */
+#define _STACK_SIZE \
+	(ctx->prog->aux->stack_depth + \
+	 + SCRATCH_SIZE + \
+	 + 4 /* extra for skb_copy_bits buffer */)
 
-static int16_t imm8m(u32 x)
-{
-	u32 rot;
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
 
-	for (rot = 0; rot < 16; rot++)
-		if ((x & ~ror32(0xff, 2 * rot)) == 0)
-			return rol32(x, 2 * rot) | (rot << 8);
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (STACK_SIZE-off-4)
 
-	return -1;
-}
+/* Offset of skb_copy_bits buffer */
+#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
 
 #if __LINUX_ARM_ARCH__ < 7
 
 static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 {
-	unsigned i = 0, offset;
+	unsigned int i = 0, offset;
 	u16 imm;
 
 	/* on the "fake" run we just count them (duplicates included) */
@@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 		ctx->imms[i] = k;
 
 	/* constants go just after the epilogue */
-	offset =  ctx->offsets[ctx->skf->len];
+	offset =  ctx->offsets[ctx->prog->len - 1] * 4;
 	offset += ctx->prologue_bytes;
 	offset += ctx->epilogue_bytes;
 	offset += i * 4;
@@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 
 #endif /* __LINUX_ARM_ARCH__ */
 
+static inline int bpf2a32_offset(int bpf_to, int bpf_from,
+				 const struct jit_ctx *ctx) {
+	int to, from;
+
+	if (ctx->target == NULL)
+		return 0;
+	to = ctx->offsets[bpf_to];
+	from = ctx->offsets[bpf_from];
+
+	return to - from - 1;
+}
+
 /*
  * Move an immediate that's not an imm8m to a core register.
  */
-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
 {
 #if __LINUX_ARM_ARCH__ < 7
 	emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
@@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
 #endif
 }
 
-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
 {
 	int imm12 = imm8m(val);
 
@@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
 		emit_mov_i_no8m(rd, val, ctx);
 }
 
-#if __LINUX_ARM_ARCH__ < 6
-
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
 {
-	_emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
-	_emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
-	_emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
-	_emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
-	_emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
+	ctx->seen |= SEEN_CALL;
+#if __LINUX_ARM_ARCH__ < 5
+	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+
+	if (elf_hwcap & HWCAP_THUMB)
+		emit(ARM_BX(tgt_reg), ctx);
+	else
+		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+#else
+	emit(ARM_BLX_R(tgt_reg), ctx);
+#endif
 }
 
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline int epilogue_offset(const struct jit_ctx *ctx)
 {
-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
-	_emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
+	int to, from;
+	/* No need for 1st dummy run */
+	if (ctx->target == NULL)
+		return 0;
+	to = ctx->epilogue_offset;
+	from = ctx->idx;
+
+	return to - from - 2;
 }
 
-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 {
-	/* r_dst = (r_src << 8) | (r_src >> 8) */
-	emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
-	emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	s32 jmp_offset;
+
+	/* checks if divisor is zero or not. If it is, then
+	 * exit directly.
+	 */
+	emit(ARM_CMP_I(rn, 0), ctx);
+	_emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
+	jmp_offset = epilogue_offset(ctx);
+	_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+#if __LINUX_ARM_ARCH__ == 7
+	if (elf_hwcap & HWCAP_IDIVA) {
+		if (op == BPF_DIV)
+			emit(ARM_UDIV(rd, rm, rn), ctx);
+		else {
+			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
+		}
+		return;
+	}
+#endif
 
 	/*
-	 * we need to mask out the bits set in r_dst[23:16] due to
-	 * the first shift instruction.
-	 *
-	 * note that 0x8ff is the encoded immediate 0x00ff0000.
+	 * For BPF_ALU | BPF_DIV | BPF_K instructions
+	 * As ARM_R1 and ARM_R0 contains 1st argument of bpf
+	 * function, we need to save it on caller side to save
+	 * it from getting destroyed within callee.
+	 * After the return from the callee, we restore ARM_R0
+	 * ARM_R1.
 	 */
-	emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
-}
+	if (rn != ARM_R1) {
+		emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
+		emit(ARM_MOV_R(ARM_R1, rn), ctx);
+	}
+	if (rm != ARM_R0) {
+		emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
+		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+	}
 
-#else  /* ARMv6+ */
+	/* Call appropriate function */
+	ctx->seen |= SEEN_CALL;
+	emit_mov_i(ARM_IP, op == BPF_DIV ?
+		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+	emit_blx_r(ARM_IP, ctx);
 
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
-{
-	_emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
-	_emit(cond, ARM_REV(r_res, r_res), ctx);
-#endif
+	/* Save return value */
+	if (rd != ARM_R0)
+		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+
+	/* Restore ARM_R0 and ARM_R1 */
+	if (rn != ARM_R1)
+		emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
+	if (rm != ARM_R0)
+		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
 }
 
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+/* Checks whether BPF register is on scratch stack space or not. */
+static inline bool is_on_stack(u8 bpf_reg)
 {
-	_emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
-	_emit(cond, ARM_REV16(r_res, r_res), ctx);
-#endif
+	static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
+				BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
+				BPF_REG_2, BPF_REG_FP};
+	int i, reg_len = sizeof(stack_regs);
+
+	for (i = 0 ; i < reg_len ; i++) {
+		if (bpf_reg == stack_regs[i])
+			return true;
+	}
+	return false;
 }
 
-static inline void emit_swap16(u8 r_dst __maybe_unused,
-			       u8 r_src __maybe_unused,
-			       struct jit_ctx *ctx __maybe_unused)
+static inline void emit_a32_mov_i(const u8 dst, const u32 val,
+				  bool dstk, struct jit_ctx *ctx)
 {
-#ifdef __LITTLE_ENDIAN
-	emit(ARM_REV16(r_dst, r_src), ctx);
-#endif
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+
+	if (dstk) {
+		emit_mov_i(tmp[1], val, ctx);
+		emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
+	} else {
+		emit_mov_i(dst, val, ctx);
+	}
 }
 
-#endif /* __LINUX_ARM_ARCH__ < 6 */
+/* Sign extended move */
+static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
+				  const u32 val, bool dstk,
+				  struct jit_ctx *ctx) {
+	u32 hi = 0;
 
+	if (is64 && (val & (1<<31)))
+		hi = (u32)~0;
+	emit_a32_mov_i(dst_lo, val, dstk, ctx);
+	emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+}
 
-/* Compute the immediate value for a PC-relative branch. */
-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
-{
-	u32 imm;
+static inline void emit_a32_add_r(const u8 dst, const u8 src,
+			      const bool is64, const bool hi,
+			      struct jit_ctx *ctx) {
+	/* 64 bit :
+	 *	adds dst_lo, dst_lo, src_lo
+	 *	adc dst_hi, dst_hi, src_hi
+	 * 32 bit :
+	 *	add dst_lo, dst_lo, src_lo
+	 */
+	if (!hi && is64)
+		emit(ARM_ADDS_R(dst, dst, src), ctx);
+	else if (hi && is64)
+		emit(ARM_ADC_R(dst, dst, src), ctx);
+	else
+		emit(ARM_ADD_R(dst, dst, src), ctx);
+}
 
-	if (ctx->target == NULL)
-		return 0;
-	/*
-	 * BPF allows only forward jumps and the offset of the target is
-	 * still the one computed during the first pass.
+static inline void emit_a32_sub_r(const u8 dst, const u8 src,
+				  const bool is64, const bool hi,
+				  struct jit_ctx *ctx) {
+	/* 64 bit :
+	 *	subs dst_lo, dst_lo, src_lo
+	 *	sbc dst_hi, dst_hi, src_hi
+	 * 32 bit :
+	 *	sub dst_lo, dst_lo, src_lo
 	 */
-	imm  = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
+	if (!hi && is64)
+		emit(ARM_SUBS_R(dst, dst, src), ctx);
+	else if (hi && is64)
+		emit(ARM_SBC_R(dst, dst, src), ctx);
+	else
+		emit(ARM_SUB_R(dst, dst, src), ctx);
+}
 
-	return imm >> 2;
+static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
+			      const bool hi, const u8 op, struct jit_ctx *ctx){
+	switch (BPF_OP(op)) {
+	/* dst = dst + src */
+	case BPF_ADD:
+		emit_a32_add_r(dst, src, is64, hi, ctx);
+		break;
+	/* dst = dst - src */
+	case BPF_SUB:
+		emit_a32_sub_r(dst, src, is64, hi, ctx);
+		break;
+	/* dst = dst | src */
+	case BPF_OR:
+		emit(ARM_ORR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst & src */
+	case BPF_AND:
+		emit(ARM_AND_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst ^ src */
+	case BPF_XOR:
+		emit(ARM_EOR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst * src */
+	case BPF_MUL:
+		emit(ARM_MUL(dst, dst, src), ctx);
+		break;
+	/* dst = dst << src */
+	case BPF_LSH:
+		emit(ARM_LSL_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst >> src */
+	case BPF_RSH:
+		emit(ARM_LSR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst >> src (signed)*/
+	case BPF_ARSH:
+		emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
+		break;
+	}
 }
 
-#define OP_IMM3(op, r1, r2, imm_val, ctx)				\
-	do {								\
-		imm12 = imm8m(imm_val);					\
-		if (imm12 < 0) {					\
-			emit_mov_i_no8m(r_scratch, imm_val, ctx);	\
-			emit(op ## _R((r1), (r2), r_scratch), ctx);	\
-		} else {						\
-			emit(op ## _I((r1), (r2), imm12), ctx);		\
-		}							\
-	} while (0)
-
-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
-{
-	if (ctx->ret0_fp_idx >= 0) {
-		_emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
-		/* NOP to keep the size constant between passes */
-		emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
+/* ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_a32_alu_r(const u8 dst, const u8 src,
+				  bool dstk, bool sstk,
+				  struct jit_ctx *ctx, const bool is64,
+				  const bool hi, const u8 op) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rn = sstk ? tmp[1] : src;
+
+	if (sstk)
+		emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
+
+	/* ALU operation */
+	if (dstk) {
+		emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
+		emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
+		emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
 	} else {
-		_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
-		_emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+		emit_alu_r(dst, rn, is64, hi, op, ctx);
 	}
 }
 
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
-{
-#if __LINUX_ARM_ARCH__ < 5
-	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+/* ALU operation (64 bit) */
+static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
+				  const u8 src[], bool dstk,
+				  bool sstk, struct jit_ctx *ctx,
+				  const u8 op) {
+	emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
+	if (is64)
+		emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
+	else
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+}
 
-	if (elf_hwcap & HWCAP_THUMB)
-		emit(ARM_BX(tgt_reg), ctx);
+/* dst = imm (4 bytes)*/
+static inline void emit_a32_mov_r(const u8 dst, const u8 src,
+				  bool dstk, bool sstk,
+				  struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rt = sstk ? tmp[0] : src;
+
+	if (sstk)
+		emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
+	if (dstk)
+		emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
 	else
-		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
-#else
-	emit(ARM_BLX_R(tgt_reg), ctx);
-#endif
+		emit(ARM_MOV_R(dst, rt), ctx);
 }
 
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
-				int bpf_op)
-{
-#if __LINUX_ARM_ARCH__ == 7
-	if (elf_hwcap & HWCAP_IDIVA) {
-		if (bpf_op == BPF_DIV)
-			emit(ARM_UDIV(rd, rm, rn), ctx);
-		else {
-			emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
-			emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
-		}
-		return;
+/* dst = src */
+static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
+				  const u8 src[], bool dstk,
+				  bool sstk, struct jit_ctx *ctx) {
+	emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
+	if (is64) {
+		/* complete 8 byte move */
+		emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
+	} else {
+		/* Zero out high 4 bytes */
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
 	}
-#endif
+}
 
-	/*
-	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
-	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
-	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
-	 * before using it as a source for ARM_R1.
-	 *
-	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
-	 * ARM_R5 (r_X) so there is no particular register overlap
-	 * issues.
-	 */
-	if (rn != ARM_R1)
-		emit(ARM_MOV_R(ARM_R1, rn), ctx);
-	if (rm != ARM_R0)
-		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+/* Shift operations */
+static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
+				struct jit_ctx *ctx, const u8 op) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[0] : dst;
+
+	if (dstk)
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+
+	/* Do shift operation */
+	switch (op) {
+	case BPF_LSH:
+		emit(ARM_LSL_I(rd, rd, val), ctx);
+		break;
+	case BPF_RSH:
+		emit(ARM_LSR_I(rd, rd, val), ctx);
+		break;
+	case BPF_NEG:
+		emit(ARM_RSB_I(rd, rd, val), ctx);
+		break;
+	}
 
+	if (dstk)
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_a32_neg64(const u8 dst[], bool dstk,
+				struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst[1];
+	u8 rm = dstk ? tmp[0] : dst[0];
+
+	/* Setup Operand */
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do Negate Operation */
+	emit(ARM_RSBS_I(rd, rd, 0), ctx);
+	emit(ARM_RSC_I(rm, rm, 0), ctx);
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst << src */
+static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
 	ctx->seen |= SEEN_CALL;
-	emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
-		   ctx);
-	emit_blx_r(ARM_R3, ctx);
+	emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+	emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
+
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
+}
 
-	if (rd != ARM_R0)
-		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+/* dst = dst >> src (signed)*/
+static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do the ARSH operation */
+	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+	_emit(ARM_COND_MI, ARM_B(0), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
+}
+
+/* dst = dst >> src */
+static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
+				     bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
 }
 
-static inline void update_on_xread(struct jit_ctx *ctx)
+/* dst = dst << val */
+static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
+				     const u32 val, struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx);
+		emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx);
+		emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx);
+	} else {
+		if (val == 32)
+			emit(ARM_MOV_R(rm, rd), ctx);
+		else
+			emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx);
+		emit(ARM_EOR_R(rd, rd, rd), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst >> val */
+static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk,
+				    const u32 val, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSR operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+		emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx);
+	} else if (val == 32) {
+		emit(ARM_MOV_R(rd, rm), ctx);
+		emit(ARM_MOV_I(rm, 0), ctx);
+	} else {
+		emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx);
+		emit(ARM_MOV_I(rm, 0), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk,
+				     const u32 val, struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	 /* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do ARSH operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+		emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx);
+	} else if (val == 32) {
+		emit(ARM_MOV_R(rd, rm), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+	} else {
+		emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands for multiplication */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rn = sstk ? tmp2[0] : src_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+	if (sstk) {
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+		emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx);
+	}
+
+	/* Do Multiplication */
+	emit(ARM_MUL(ARM_IP, rd, rn), ctx);
+	emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+
+	emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
+	emit(ARM_ADD_R(rm, ARM_LR, rm), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_IP), ctx);
+	}
+}
+
+/* *(size *)(dst + off) = src */
+static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
+			      const s32 off, struct jit_ctx *ctx, const u8 sz){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst;
+
+	if (dstk)
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+	if (off) {
+		emit_a32_mov_i(tmp[0], off, false, ctx);
+		emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
+		rd = tmp[0];
+	}
+	switch (sz) {
+	case BPF_W:
+		/* Store a Word */
+		emit(ARM_STR_I(src, rd, 0), ctx);
+		break;
+	case BPF_H:
+		/* Store a HalfWord */
+		emit(ARM_STRH_I(src, rd, 0), ctx);
+		break;
+	case BPF_B:
+		/* Store a Byte */
+		emit(ARM_STRB_I(src, rd, 0), ctx);
+		break;
+	}
+}
+
+/* dst = *(size*)(src + off) */
+static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
+			      const s32 off, struct jit_ctx *ctx, const u8 sz){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst;
+	u8 rm = src;
+
+	if (off) {
+		emit_a32_mov_i(tmp[0], off, false, ctx);
+		emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+		rm = tmp[0];
+	}
+	switch (sz) {
+	case BPF_W:
+		/* Load a Word */
+		emit(ARM_LDR_I(rd, rm, 0), ctx);
+		break;
+	case BPF_H:
+		/* Load a HalfWord */
+		emit(ARM_LDRH_I(rd, rm, 0), ctx);
+		break;
+	case BPF_B:
+		/* Load a Byte */
+		emit(ARM_LDRB_I(rd, rm, 0), ctx);
+		break;
+	}
+	if (dstk)
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* Arithmatic Operation */
+static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
+			     const u8 rn, struct jit_ctx *ctx, u8 op) {
+	switch (op) {
+	case BPF_JSET:
+		ctx->seen |= SEEN_CALL;
+		emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+		emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+		emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		break;
+	case BPF_JEQ:
+	case BPF_JNE:
+	case BPF_JGT:
+	case BPF_JGE:
+	case BPF_JLE:
+	case BPF_JLT:
+		emit(ARM_CMP_R(rd, rm), ctx);
+		_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		break;
+	case BPF_JSLE:
+	case BPF_JSGT:
+		emit(ARM_CMP_R(rn, rt), ctx);
+		emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+		break;
+	case BPF_JSLT:
+	case BPF_JSGE:
+		emit(ARM_CMP_R(rt, rn), ctx);
+		emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+		break;
+	}
+}
+
+static int out_offset = -1; /* initialized on the first pass of build_body() */
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
 {
-	if (!(ctx->seen & SEEN_X))
-		ctx->flags |= FLAG_NEED_X_RESET;
 
-	ctx->seen |= SEEN_X;
+	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+	const u8 *r2 = bpf2a32[BPF_REG_2];
+	const u8 *r3 = bpf2a32[BPF_REG_3];
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	const u8 *tcc = bpf2a32[TCALL_CNT];
+	const int idx0 = ctx->idx;
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+	u32 off, lo, hi;
+
+	/* if (index >= array->map.max_entries)
+	 *	goto out;
+	 */
+	off = offsetof(struct bpf_array, map.max_entries);
+	/* array->map.max_entries */
+	emit_a32_mov_i(tmp[1], off, false, ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+	emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+	/* index (64 bit) */
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+	/* index >= array->map.max_entries */
+	emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
+	_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
+
+	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *	goto out;
+	 * tail_call_cnt++;
+	 */
+	lo = (u32)MAX_TAIL_CALL_CNT;
+	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+	emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+	emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+	emit(ARM_CMP_I(tmp[0], hi), ctx);
+	_emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx);
+	_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+	emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx);
+	emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx);
+	emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+	emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+
+	/* prog = array->ptrs[index]
+	 * if (prog == NULL)
+	 *	goto out;
+	 */
+	off = offsetof(struct bpf_array, ptrs);
+	emit_a32_mov_i(tmp[1], off, false, ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+	emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+	emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx);
+	emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
+	emit(ARM_CMP_I(tmp[1], 0), ctx);
+	_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_a32_mov_i(tmp2[1], off, false, ctx);
+	emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+	emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+	emit(ARM_BX(tmp[1]), ctx);
+
+	/* out: */
+	if (out_offset == -1)
+		out_offset = cur_offset;
+	if (cur_offset != out_offset) {
+		pr_err_once("tail_call out_offset = %d, expected %d!\n",
+			    cur_offset, out_offset);
+		return -1;
+	}
+	return 0;
+#undef cur_offset
+#undef jmp_offset
 }
 
-static int build_body(struct jit_ctx *ctx)
+/* 0xabcd => 0xcdab */
+static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 {
-	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct bpf_prog *prog = ctx->skf;
-	const struct sock_filter *inst;
-	unsigned i, load_order, off, condt;
-	int imm12;
-	u32 k;
+#if __LINUX_ARM_ARCH__ < 6
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
+	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+	emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
+#else /* ARMv6+ */
+	emit(ARM_REV16(rd, rn), ctx);
+#endif
+}
 
-	for (i = 0; i < prog->len; i++) {
-		u16 code;
+/* 0xabcdefgh => 0xghefcdab */
+static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 6
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
+	emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
+
+	emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
+	emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
+	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
+	emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
+	emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
+
+#else /* ARMv6+ */
+	emit(ARM_REV(rd, rn), ctx);
+#endif
+}
 
-		inst = &(prog->insns[i]);
-		/* K as an immediate value operand */
-		k = inst->k;
-		code = bpf_anc_helper(inst);
+// push the scratch stack register on top of the stack
+static inline void emit_push_r64(const u8 src[], const u8 shift,
+		struct jit_ctx *ctx)
+{
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	u16 reg_set = 0;
 
-		/* compute offsets only in the fake pass */
-		if (ctx->target == NULL)
-			ctx->offsets[i] = ctx->idx * 4;
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx);
+	emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx);
+
+	reg_set = (1 << tmp2[1]) | (1 << tmp2[0]);
+	emit(ARM_PUSH(reg_set), ctx);
+}
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+	const u8 r0 = bpf2a32[BPF_REG_0][1];
+	const u8 r2 = bpf2a32[BPF_REG_1][1];
+	const u8 r3 = bpf2a32[BPF_REG_1][0];
+	const u8 r4 = bpf2a32[BPF_REG_6][1];
+	const u8 r5 = bpf2a32[BPF_REG_6][0];
+	const u8 r6 = bpf2a32[TMP_REG_1][1];
+	const u8 r7 = bpf2a32[TMP_REG_1][0];
+	const u8 r8 = bpf2a32[TMP_REG_2][1];
+	const u8 r10 = bpf2a32[TMP_REG_2][0];
+	const u8 fplo = bpf2a32[BPF_REG_FP][1];
+	const u8 fphi = bpf2a32[BPF_REG_FP][0];
+	const u8 sp = ARM_SP;
+	const u8 *tcc = bpf2a32[TCALL_CNT];
+
+	u16 reg_set = 0;
+
+	/*
+	 * eBPF prog stack layout
+	 *
+	 *                         high
+	 * original ARM_SP =>     +-----+ eBPF prologue
+	 *                        |FP/LR|
+	 * current ARM_FP =>      +-----+
+	 *                        | ... | callee saved registers
+	 * eBPF fp register =>    +-----+ <= (BPF_FP)
+	 *                        | ... | eBPF JIT scratch space
+	 *                        |     | eBPF prog stack
+	 *                        +-----+
+	 *			  |RSVD | JIT scratchpad
+	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
+	 *                        |     |
+	 *                        | ... | Function call stack
+	 *                        |     |
+	 *                        +-----+
+	 *                          low
+	 */
+
+	/* Save callee saved registers. */
+	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+	reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
+	emit(ARM_MOV_R(ARM_IP, sp), ctx);
+	emit(ARM_PUSH(reg_set), ctx);
+	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+#else
+	/* Check if call instruction exists in BPF body */
+	if (ctx->seen & SEEN_CALL)
+		reg_set |= (1<<ARM_LR);
+	emit(ARM_PUSH(reg_set), ctx);
+#endif
+	/* Save frame pointer for later */
+	emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+
+	ctx->stack_size = imm8m(STACK_SIZE);
+
+	/* Set up function call stack */
+	emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
 
-		switch (code) {
-		case BPF_LD | BPF_IMM:
-			emit_mov_i(r_A, k, ctx);
+	/* Set up BPF prog stack base register */
+	emit_a32_mov_r(fplo, ARM_IP, true, false, ctx);
+	emit_a32_mov_i(fphi, 0, true, ctx);
+
+	/* mov r4, 0 */
+	emit(ARM_MOV_I(r4, 0), ctx);
+
+	/* Move BPF_CTX to BPF_R1 */
+	emit(ARM_MOV_R(r3, r4), ctx);
+	emit(ARM_MOV_R(r2, r0), ctx);
+	/* Initialize Tail Count */
+	emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx);
+	emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx);
+	/* end of prologue */
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	const u8 r4 = bpf2a32[BPF_REG_6][1];
+	const u8 r5 = bpf2a32[BPF_REG_6][0];
+	const u8 r6 = bpf2a32[TMP_REG_1][1];
+	const u8 r7 = bpf2a32[TMP_REG_1][0];
+	const u8 r8 = bpf2a32[TMP_REG_2][1];
+	const u8 r10 = bpf2a32[TMP_REG_2][0];
+	u16 reg_set = 0;
+
+	/* unwind function call stack */
+	emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
+
+	/* restore callee saved registers. */
+	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+	/* the first instruction of the prologue was: mov ip, sp */
+	reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+	emit(ARM_LDM(ARM_SP, reg_set), ctx);
+#else
+	if (ctx->seen & SEEN_CALL)
+		reg_set |= (1<<ARM_PC);
+	/* Restore callee saved registers. */
+	emit(ARM_POP(reg_set), ctx);
+	/* Return back to the callee function */
+	if (!(ctx->seen & SEEN_CALL))
+		emit(ARM_BX(ARM_LR), ctx);
+#endif
+}
+
+/*
+ * Convert an eBPF instruction to native instruction, i.e
+ * JITs an eBPF instruction.
+ * Returns :
+ *	0  - Successfully JITed an 8-byte eBPF instruction
+ *	>0 - Successfully JITed a 16-byte eBPF instruction
+ *	<0 - Failed to JIT.
+ */
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	const u8 *dst = bpf2a32[insn->dst_reg];
+	const u8 *src = bpf2a32[insn->src_reg];
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	const int i = insn - ctx->prog->insnsi;
+	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+	const bool dstk = is_on_stack(insn->dst_reg);
+	const bool sstk = is_on_stack(insn->src_reg);
+	u8 rd, rt, rm, rn;
+	s32 jmp_offset;
+
+#define check_imm(bits, imm) do {				\
+	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
+	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
+		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
+			i, imm, imm);				\
+		return -EINVAL;					\
+	}							\
+} while (0)
+#define check_imm24(imm) check_imm(24, imm)
+
+	switch (code) {
+	/* ALU operations */
+
+	/* dst = src */
+	case BPF_ALU | BPF_MOV | BPF_K:
+	case BPF_ALU | BPF_MOV | BPF_X:
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx);
 			break;
-		case BPF_LD | BPF_W | BPF_LEN:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-			emit(ARM_LDR_I(r_A, r_skb,
-				       offsetof(struct sk_buff, len)), ctx);
+		case BPF_K:
+			/* Sign-extend immediate value to destination reg */
+			emit_a32_mov_i64(is64, dst, imm, dstk, ctx);
 			break;
-		case BPF_LD | BPF_MEM:
-			/* A = scratch[k] */
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
+		}
+		break;
+	/* dst = dst + src/imm */
+	/* dst = dst - src/imm */
+	/* dst = dst | src/imm */
+	/* dst = dst & src/imm */
+	/* dst = dst ^ src/imm */
+	/* dst = dst * src/imm */
+	/* dst = dst << src */
+	/* dst = dst >> src */
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU | BPF_ADD | BPF_X:
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU | BPF_SUB | BPF_X:
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU | BPF_MUL | BPF_K:
+	case BPF_ALU | BPF_MUL | BPF_X:
+	case BPF_ALU | BPF_LSH | BPF_X:
+	case BPF_ALU | BPF_RSH | BPF_X:
+	case BPF_ALU | BPF_ARSH | BPF_K:
+	case BPF_ALU | BPF_ARSH | BPF_X:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_alu_r64(is64, dst, src, dstk, sstk,
+					 ctx, BPF_OP(code));
 			break;
-		case BPF_LD | BPF_W | BPF_ABS:
-			load_order = 2;
-			goto load;
-		case BPF_LD | BPF_H | BPF_ABS:
-			load_order = 1;
-			goto load;
-		case BPF_LD | BPF_B | BPF_ABS:
-			load_order = 0;
-load:
-			emit_mov_i(r_off, k, ctx);
-load_common:
-			ctx->seen |= SEEN_DATA | SEEN_CALL;
-
-			if (load_order > 0) {
-				emit(ARM_SUB_I(r_scratch, r_skb_hl,
-					       1 << load_order), ctx);
-				emit(ARM_CMP_R(r_scratch, r_off), ctx);
-				condt = ARM_COND_GE;
-			} else {
-				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
-				condt = ARM_COND_HI;
-			}
-
-			/*
-			 * test for negative offset, only if we are
-			 * currently scheduled to take the fast
-			 * path. this will update the flags so that
-			 * the slowpath instruction are ignored if the
-			 * offset is negative.
-			 *
-			 * for loard_order == 0 the HI condition will
-			 * make loads at offset 0 take the slow path too.
+		case BPF_K:
+			/* Move immediate value to the temporary register
+			 * and then do the ALU operation on the temporary
+			 * register as this will sign-extend the immediate
+			 * value into temporary reg and then it would be
+			 * safe to do the operation on it.
 			 */
-			_emit(condt, ARM_CMP_I(r_off, 0), ctx);
-
-			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
-			      ctx);
-
-			if (load_order == 0)
-				_emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
-				      ctx);
-			else if (load_order == 1)
-				emit_load_be16(condt, r_A, r_scratch, ctx);
-			else if (load_order == 2)
-				emit_load_be32(condt, r_A, r_scratch, ctx);
-
-			_emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
-
-			/* the slowpath */
-			emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			/* the offset is already in R1 */
-			emit_blx_r(ARM_R3, ctx);
-			/* check the result of skb_copy_bits */
-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
-			emit_err_ret(ARM_COND_NE, ctx);
-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
+			emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+			emit_a32_alu_r64(is64, dst, tmp2, dstk, false,
+					 ctx, BPF_OP(code));
 			break;
-		case BPF_LD | BPF_W | BPF_IND:
-			load_order = 2;
-			goto load_ind;
-		case BPF_LD | BPF_H | BPF_IND:
-			load_order = 1;
-			goto load_ind;
-		case BPF_LD | BPF_B | BPF_IND:
-			load_order = 0;
-load_ind:
-			update_on_xread(ctx);
-			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
-			goto load_common;
-		case BPF_LDX | BPF_IMM:
-			ctx->seen |= SEEN_X;
-			emit_mov_i(r_X, k, ctx);
+		}
+		break;
+	/* dst = dst / src(imm) */
+	/* dst = dst % src(imm) */
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU | BPF_MOD | BPF_K:
+	case BPF_ALU | BPF_MOD | BPF_X:
+		rt = src_lo;
+		rd = dstk ? tmp2[1] : dst_lo;
+		if (dstk)
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			rt = sstk ? tmp2[0] : rt;
+			if (sstk)
+				emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)),
+				     ctx);
 			break;
-		case BPF_LDX | BPF_W | BPF_LEN:
-			ctx->seen |= SEEN_X | SEEN_SKB;
-			emit(ARM_LDR_I(r_X, r_skb,
-				       offsetof(struct sk_buff, len)), ctx);
+		case BPF_K:
+			rt = tmp2[0];
+			emit_a32_mov_i(rt, imm, false, ctx);
 			break;
-		case BPF_LDX | BPF_MEM:
-			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
-			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
+		}
+		emit_udivmod(rd, rd, rt, ctx, BPF_OP(code));
+		if (dstk)
+			emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		goto notyet;
+	/* dst = dst >> imm */
+	/* dst = dst << imm */
+	case BPF_ALU | BPF_RSH | BPF_K:
+	case BPF_ALU | BPF_LSH | BPF_K:
+		if (unlikely(imm > 31))
+			return -EINVAL;
+		if (imm)
+			emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code));
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	/* dst = dst << imm */
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_lsh_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = dst >> imm */
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_lsr_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = dst << src */
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit_a32_lsh_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> src */
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit_a32_lsr_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> src (signed) */
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit_a32_arsh_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> imm (signed) */
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_arsh_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = ~dst */
+	case BPF_ALU | BPF_NEG:
+		emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code));
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	/* dst = ~dst (64 bit) */
+	case BPF_ALU64 | BPF_NEG:
+		emit_a32_neg64(dst, dstk, ctx);
+		break;
+	/* dst = dst * src/imm */
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_mul_r64(dst, src, dstk, sstk, ctx);
 			break;
-		case BPF_LDX | BPF_B | BPF_MSH:
-			/* x = ((*(frame + k)) & 0xf) << 2; */
-			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
-			/* the interpreter should deal with the negative K */
-			if ((int)k < 0)
-				return -1;
-			/* offset in r1: we might have to take the slow path */
-			emit_mov_i(r_off, k, ctx);
-			emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
-
-			/* load in r0: common with the slowpath */
-			_emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
-						      ARM_R1), ctx);
-			/*
-			 * emit_mov_i() might generate one or two instructions,
-			 * the same holds for emit_blx_r()
+		case BPF_K:
+			/* Move immediate value to the temporary register
+			 * and then do the multiplication on it as this
+			 * will sign-extend the immediate value into temp
+			 * reg then it would be safe to do the operation
+			 * on it.
 			 */
-			_emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
-
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			/* r_off is r1 */
-			emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
-			emit_blx_r(ARM_R3, ctx);
-			/* check the return value of skb_copy_bits */
-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
-			emit_err_ret(ARM_COND_NE, ctx);
-
-			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
-			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
-			break;
-		case BPF_ST:
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
-			break;
-		case BPF_STX:
-			update_on_xread(ctx);
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
-			break;
-		case BPF_ALU | BPF_ADD | BPF_K:
-			/* A += K */
-			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
-			break;
-		case BPF_ALU | BPF_ADD | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
-			break;
-		case BPF_ALU | BPF_SUB | BPF_K:
-			/* A -= K */
-			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
-			break;
-		case BPF_ALU | BPF_SUB | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
-			break;
-		case BPF_ALU | BPF_MUL | BPF_K:
-			/* A *= K */
-			emit_mov_i(r_scratch, k, ctx);
-			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
-			break;
-		case BPF_ALU | BPF_MUL | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_MUL(r_A, r_A, r_X), ctx);
+			emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+			emit_a32_mul_r64(dst, tmp2, dstk, false, ctx);
 			break;
-		case BPF_ALU | BPF_DIV | BPF_K:
-			if (k == 1)
-				break;
-			emit_mov_i(r_scratch, k, ctx);
-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV);
-			break;
-		case BPF_ALU | BPF_DIV | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_CMP_I(r_X, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV);
+		}
+		break;
+	/* dst = htole(dst) */
+	/* dst = htobe(dst) */
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		rd = dstk ? tmp[0] : dst_hi;
+		rt = dstk ? tmp[1] : dst_lo;
+		if (dstk) {
+			emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+		if (BPF_SRC(code) == BPF_FROM_LE)
+			goto emit_bswap_uxt;
+		switch (imm) {
+		case 16:
+			emit_rev16(rt, rt, ctx);
+			goto emit_bswap_uxt;
+		case 32:
+			emit_rev32(rt, rt, ctx);
+			goto emit_bswap_uxt;
+		case 64:
+			/* Because of the usage of ARM_LR */
+			ctx->seen |= SEEN_CALL;
+			emit_rev32(ARM_LR, rt, ctx);
+			emit_rev32(rt, rd, ctx);
+			emit(ARM_MOV_R(rd, ARM_LR), ctx);
 			break;
-		case BPF_ALU | BPF_MOD | BPF_K:
-			if (k == 1) {
-				emit_mov_i(r_A, 0, ctx);
-				break;
-			}
-			emit_mov_i(r_scratch, k, ctx);
-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD);
+		}
+		goto exit;
+emit_bswap_uxt:
+		switch (imm) {
+		case 16:
+			/* zero-extend 16 bits into 64 bits */
+#if __LINUX_ARM_ARCH__ < 6
+			emit_a32_mov_i(tmp2[1], 0xffff, false, ctx);
+			emit(ARM_AND_R(rt, rt, tmp2[1]), ctx);
+#else /* ARMv6+ */
+			emit(ARM_UXTH(rt, rt), ctx);
+#endif
+			emit(ARM_EOR_R(rd, rd, rd), ctx);
 			break;
-		case BPF_ALU | BPF_MOD | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_CMP_I(r_X, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
+		case 32:
+			/* zero-extend 32 bits into 64 bits */
+			emit(ARM_EOR_R(rd, rd, rd), ctx);
 			break;
-		case BPF_ALU | BPF_OR | BPF_K:
-			/* A |= K */
-			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
+		case 64:
+			/* nop */
 			break;
-		case BPF_ALU | BPF_OR | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
+		}
+exit:
+		if (dstk) {
+			emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+		break;
+	/* dst = imm64 */
+	case BPF_LD | BPF_IMM | BPF_DW:
+	{
+		const struct bpf_insn insn1 = insn[1];
+		u32 hi, lo = imm;
+
+		hi = insn1.imm;
+		emit_a32_mov_i(dst_lo, lo, dstk, ctx);
+		emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+
+		return 1;
+	}
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		rn = sstk ? tmp2[1] : src_lo;
+		if (sstk)
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			/* Load a Word */
+		case BPF_H:
+			/* Load a Half-Word */
+		case BPF_B:
+			/* Load a Byte */
+			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
+			emit_a32_mov_i(dst_hi, 0, dstk, ctx);
 			break;
-		case BPF_ALU | BPF_XOR | BPF_K:
-			/* A ^= K; */
-			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
+		case BPF_DW:
+			/* Load a double word */
+			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+			emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
 			break;
-		case BPF_ANC | SKF_AD_ALU_XOR_X:
-		case BPF_ALU | BPF_XOR | BPF_X:
-			/* A ^= X */
-			update_on_xread(ctx);
-			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
+		}
+		break;
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+	case BPF_LD | BPF_ABS | BPF_W:
+	case BPF_LD | BPF_ABS | BPF_H:
+	case BPF_LD | BPF_ABS | BPF_B:
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
+	case BPF_LD | BPF_IND | BPF_W:
+	case BPF_LD | BPF_IND | BPF_H:
+	case BPF_LD | BPF_IND | BPF_B:
+	{
+		const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
+		const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
+						     /* rtn value */
+		const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
+		const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
+		const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
+		const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
+		int size;
+
+		/* Setting up first argument */
+		emit(ARM_MOV_R(r0, r4), ctx);
+
+		/* Setting up second argument */
+		emit_a32_mov_i(r1, imm, false, ctx);
+		if (BPF_MODE(code) == BPF_IND)
+			emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
+				       false, false, BPF_ADD);
+
+		/* Setting up third argument */
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			size = 4;
 			break;
-		case BPF_ALU | BPF_AND | BPF_K:
-			/* A &= K */
-			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
+		case BPF_H:
+			size = 2;
 			break;
-		case BPF_ALU | BPF_AND | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
+		case BPF_B:
+			size = 1;
 			break;
-		case BPF_ALU | BPF_LSH | BPF_K:
-			if (unlikely(k > 31))
-				return -1;
-			emit(ARM_LSL_I(r_A, r_A, k), ctx);
+		default:
+			return -EINVAL;
+		}
+		emit_a32_mov_i(r2, size, false, ctx);
+
+		/* Setting up fourth argument */
+		emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
+
+		/* Setting up function pointer to call */
+		emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
+		emit_blx_r(r6, ctx);
+
+		emit(ARM_EOR_R(r1, r1, r1), ctx);
+		/* Check if return address is NULL or not.
+		 * if NULL then jump to epilogue
+		 * else continue to load the value from retn address
+		 */
+		emit(ARM_CMP_I(r0, 0), ctx);
+		jmp_offset = epilogue_offset(ctx);
+		check_imm24(jmp_offset);
+		_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+		/* Load value from the address */
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(ARM_LDR_I(r0, r0, 0), ctx);
+			emit_rev32(r0, r0, ctx);
 			break;
-		case BPF_ALU | BPF_LSH | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
+		case BPF_H:
+			emit(ARM_LDRH_I(r0, r0, 0), ctx);
+			emit_rev16(r0, r0, ctx);
 			break;
-		case BPF_ALU | BPF_RSH | BPF_K:
-			if (unlikely(k > 31))
-				return -1;
-			if (k)
-				emit(ARM_LSR_I(r_A, r_A, k), ctx);
+		case BPF_B:
+			emit(ARM_LDRB_I(r0, r0, 0), ctx);
+			/* No need to reverse */
 			break;
-		case BPF_ALU | BPF_RSH | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
+		}
+		break;
+	}
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		switch (BPF_SIZE(code)) {
+		case BPF_DW:
+			/* Sign-extend immediate value into temp reg */
+			emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+			emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W);
+			emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W);
 			break;
-		case BPF_ALU | BPF_NEG:
-			/* A = -A */
-			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
+		case BPF_W:
+		case BPF_H:
+		case BPF_B:
+			emit_a32_mov_i(tmp2[1], imm, false, ctx);
+			emit_str_r(dst_lo, tmp2[1], dstk, off, ctx,
+				   BPF_SIZE(code));
 			break;
-		case BPF_JMP | BPF_JA:
-			/* pc += K */
-			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
+		}
+		break;
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W:
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW:
+		goto notyet;
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW:
+	{
+		u8 sz = BPF_SIZE(code);
+
+		rn = sstk ? tmp2[1] : src_lo;
+		rm = sstk ? tmp2[0] : src_hi;
+		if (sstk) {
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+			emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+		}
+
+		/* Store the value */
+		if (BPF_SIZE(code) == BPF_DW) {
+			emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+			emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W);
+		} else {
+			emit_str_r(dst_lo, rn, dstk, off, ctx, sz);
+		}
+		break;
+	}
+	/* PC += off if dst == src */
+	/* PC += off if dst > src */
+	/* PC += off if dst >= src */
+	/* PC += off if dst < src */
+	/* PC += off if dst <= src */
+	/* PC += off if dst != src */
+	/* PC += off if dst > src (signed) */
+	/* PC += off if dst >= src (signed) */
+	/* PC += off if dst < src (signed) */
+	/* PC += off if dst <= src (signed) */
+	/* PC += off if dst & src */
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+		/* Setup source registers */
+		rm = sstk ? tmp2[0] : src_hi;
+		rn = sstk ? tmp2[1] : src_lo;
+		if (sstk) {
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+			emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+		}
+		goto go_jmp;
+	/* PC += off if dst == imm */
+	/* PC += off if dst > imm */
+	/* PC += off if dst >= imm */
+	/* PC += off if dst < imm */
+	/* PC += off if dst <= imm */
+	/* PC += off if dst != imm */
+	/* PC += off if dst > imm (signed) */
+	/* PC += off if dst >= imm (signed) */
+	/* PC += off if dst < imm (signed) */
+	/* PC += off if dst <= imm (signed) */
+	/* PC += off if dst & imm */
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSET | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+		if (off == 0)
 			break;
-		case BPF_JMP | BPF_JEQ | BPF_K:
-			/* pc += (A == K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_EQ;
-			goto cmp_imm;
-		case BPF_JMP | BPF_JGT | BPF_K:
-			/* pc += (A > K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_HI;
-			goto cmp_imm;
-		case BPF_JMP | BPF_JGE | BPF_K:
-			/* pc += (A >= K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_HS;
-cmp_imm:
-			imm12 = imm8m(k);
-			if (imm12 < 0) {
-				emit_mov_i_no8m(r_scratch, k, ctx);
-				emit(ARM_CMP_R(r_A, r_scratch), ctx);
-			} else {
-				emit(ARM_CMP_I(r_A, imm12), ctx);
-			}
-cond_jump:
-			if (inst->jt)
-				_emit(condt, ARM_B(b_imm(i + inst->jt + 1,
-						   ctx)), ctx);
-			if (inst->jf)
-				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
-							     ctx)), ctx);
+		rm = tmp2[0];
+		rn = tmp2[1];
+		/* Sign-extend immediate value */
+		emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+go_jmp:
+		/* Setup destination register */
+		rd = dstk ? tmp[0] : dst_hi;
+		rt = dstk ? tmp[1] : dst_lo;
+		if (dstk) {
+			emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+
+		/* Check for the condition */
+		emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code));
+
+		/* Setup JUMP instruction */
+		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+		switch (BPF_OP(code)) {
+		case BPF_JNE:
+		case BPF_JSET:
+			_emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_JMP | BPF_JEQ | BPF_X:
-			/* pc += (A == X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_EQ;
-			goto cmp_x;
-		case BPF_JMP | BPF_JGT | BPF_X:
-			/* pc += (A > X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_HI;
-			goto cmp_x;
-		case BPF_JMP | BPF_JGE | BPF_X:
-			/* pc += (A >= X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_CS;
-cmp_x:
-			update_on_xread(ctx);
-			emit(ARM_CMP_R(r_A, r_X), ctx);
-			goto cond_jump;
-		case BPF_JMP | BPF_JSET | BPF_K:
-			/* pc += (A & K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_NE;
-			/* not set iff all zeroes iff Z==1 iff EQ */
-
-			imm12 = imm8m(k);
-			if (imm12 < 0) {
-				emit_mov_i_no8m(r_scratch, k, ctx);
-				emit(ARM_TST_R(r_A, r_scratch), ctx);
-			} else {
-				emit(ARM_TST_I(r_A, imm12), ctx);
-			}
-			goto cond_jump;
-		case BPF_JMP | BPF_JSET | BPF_X:
-			/* pc += (A & X) ? pc->jt : pc->jf */
-			update_on_xread(ctx);
-			condt  = ARM_COND_NE;
-			emit(ARM_TST_R(r_A, r_X), ctx);
-			goto cond_jump;
-		case BPF_RET | BPF_A:
-			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
-			goto b_epilogue;
-		case BPF_RET | BPF_K:
-			if ((k == 0) && (ctx->ret0_fp_idx < 0))
-				ctx->ret0_fp_idx = i;
-			emit_mov_i(ARM_R0, k, ctx);
-b_epilogue:
-			if (i != ctx->skf->len - 1)
-				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
+		case BPF_JEQ:
+			_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_MISC | BPF_TAX:
-			/* X = A */
-			ctx->seen |= SEEN_X;
-			emit(ARM_MOV_R(r_X, r_A), ctx);
+		case BPF_JGT:
+			_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_MISC | BPF_TXA:
-			/* A = X */
-			update_on_xread(ctx);
-			emit(ARM_MOV_R(r_A, r_X), ctx);
+		case BPF_JGE:
+			_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_PROTOCOL:
-			/* A = ntohs(skb->protocol) */
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  protocol) != 2);
-			off = offsetof(struct sk_buff, protocol);
-			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
-			emit_swap16(r_A, r_scratch, ctx);
+		case BPF_JSGT:
+			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_CPU:
-			/* r_scratch = current_thread_info() */
-			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
-			/* A = current_thread_info()->cpu */
-			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
-			off = offsetof(struct thread_info, cpu);
-			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
+		case BPF_JSGE:
+			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_IFINDEX:
-		case BPF_ANC | SKF_AD_HATYPE:
-			/* A = skb->dev->ifindex */
-			/* A = skb->dev->type */
-			ctx->seen |= SEEN_SKB;
-			off = offsetof(struct sk_buff, dev);
-			emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
-
-			emit(ARM_CMP_I(r_scratch, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  ifindex) != 4);
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  type) != 2);
-
-			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
-				off = offsetof(struct net_device, ifindex);
-				emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
-			} else {
-				/*
-				 * offset of field "type" in "struct
-				 * net_device" is above what can be
-				 * used in the ldrh rd, [rn, #imm]
-				 * instruction, so load the offset in
-				 * a register and use ldrh rd, [rn, rm]
-				 */
-				off = offsetof(struct net_device, type);
-				emit_mov_i(ARM_R3, off, ctx);
-				emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx);
-			}
+		case BPF_JLE:
+			_emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_MARK:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-			off = offsetof(struct sk_buff, mark);
-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+		case BPF_JLT:
+			_emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_RXHASH:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-			off = offsetof(struct sk_buff, hash);
-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+		case BPF_JSLT:
+			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_VLAN_TAG:
-		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-			off = offsetof(struct sk_buff, vlan_tci);
-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
-			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
-			else {
-				OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
-				OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
-			}
+		case BPF_JSLE:
+			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_PKTTYPE:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  __pkt_type_offset[0]) != 1);
-			off = PKT_TYPE_OFFSET();
-			emit(ARM_LDRB_I(r_A, r_skb, off), ctx);
-			emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx);
-#ifdef __BIG_ENDIAN_BITFIELD
-			emit(ARM_LSR_I(r_A, r_A, 5), ctx);
-#endif
+		}
+		break;
+	/* JMP OFF */
+	case BPF_JMP | BPF_JA:
+	{
+		if (off == 0)
 			break;
-		case BPF_ANC | SKF_AD_QUEUE:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  queue_mapping) != 2);
-			BUILD_BUG_ON(offsetof(struct sk_buff,
-					      queue_mapping) > 0xff);
-			off = offsetof(struct sk_buff, queue_mapping);
-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
+		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+		check_imm24(jmp_offset);
+		emit(ARM_B(jmp_offset), ctx);
+		break;
+	}
+	/* tail call */
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx))
+			return -EFAULT;
+		break;
+	/* function call */
+	case BPF_JMP | BPF_CALL:
+	{
+		const u8 *r0 = bpf2a32[BPF_REG_0];
+		const u8 *r1 = bpf2a32[BPF_REG_1];
+		const u8 *r2 = bpf2a32[BPF_REG_2];
+		const u8 *r3 = bpf2a32[BPF_REG_3];
+		const u8 *r4 = bpf2a32[BPF_REG_4];
+		const u8 *r5 = bpf2a32[BPF_REG_5];
+		const u32 func = (u32)__bpf_call_base + (u32)imm;
+
+		emit_a32_mov_r64(true, r0, r1, false, false, ctx);
+		emit_a32_mov_r64(true, r1, r2, false, true, ctx);
+		emit_push_r64(r5, 0, ctx);
+		emit_push_r64(r4, 8, ctx);
+		emit_push_r64(r3, 16, ctx);
+
+		emit_a32_mov_i(tmp[1], func, false, ctx);
+		emit_blx_r(tmp[1], ctx);
+
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
+		break;
+	}
+	/* function return */
+	case BPF_JMP | BPF_EXIT:
+		/* Optimization: when last instruction is EXIT
+		 * simply fallthrough to epilogue.
+		 */
+		if (i == ctx->prog->len - 1)
 			break;
-		case BPF_ANC | SKF_AD_PAY_OFFSET:
-			ctx->seen |= SEEN_SKB | SEEN_CALL;
+		jmp_offset = epilogue_offset(ctx);
+		check_imm24(jmp_offset);
+		emit(ARM_B(jmp_offset), ctx);
+		break;
+notyet:
+		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+		return -EFAULT;
+	default:
+		pr_err_once("unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
 
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx);
-			emit_blx_r(ARM_R3, ctx);
-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
-			break;
-		case BPF_LDX | BPF_W | BPF_ABS:
-			/*
-			 * load a 32bit word from struct seccomp_data.
-			 * seccomp_check_filter() will already have checked
-			 * that k is 32bit aligned and lies within the
-			 * struct seccomp_data.
-			 */
-			ctx->seen |= SEEN_SKB;
-			emit(ARM_LDR_I(r_A, r_skb, k), ctx);
-			break;
-		default:
-			return -1;
+	if (ctx->flags & FLAG_IMM_OVERFLOW)
+		/*
+		 * this instruction generated an overflow when
+		 * trying to access the literal pool, so
+		 * delegate this filter to the kernel interpreter.
+		 */
+		return -1;
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	unsigned int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &(prog->insnsi[i]);
+		int ret;
+
+		ret = build_insn(insn, ctx);
+
+		/* It's used with loading the 64 bit immediate value. */
+		if (ret > 0) {
+			i++;
+			if (ctx->target == NULL)
+				ctx->offsets[i] = ctx->idx;
+			continue;
 		}
 
-		if (ctx->flags & FLAG_IMM_OVERFLOW)
-			/*
-			 * this instruction generated an overflow when
-			 * trying to access the literal pool, so
-			 * delegate this filter to the kernel interpreter.
-			 */
-			return -1;
+		if (ctx->target == NULL)
+			ctx->offsets[i] = ctx->idx;
+
+		/* If unsuccesfull, return with error code */
+		if (ret)
+			return ret;
 	}
+	return 0;
+}
 
-	/* compute offsets only during the first pass */
-	if (ctx->target == NULL)
-		ctx->offsets[i] = ctx->idx * 4;
+static int validate_code(struct jit_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->idx; i++) {
+		if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
+			return -1;
+	}
 
 	return 0;
 }
 
+void bpf_jit_compile(struct bpf_prog *prog)
+{
+	/* Nothing to do here. We support Internal BPF. */
+}
 
-void bpf_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
+	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header;
+	bool tmp_blinded = false;
 	struct jit_ctx ctx;
-	unsigned tmp_idx;
-	unsigned alloc_size;
-	u8 *target_ptr;
+	unsigned int tmp_idx;
+	unsigned int image_size;
+	u8 *image_ptr;
 
+	/* If BPF JIT was not enabled then we must fall back to
+	 * the interpreter.
+	 */
 	if (!bpf_jit_enable)
-		return;
+		return orig_prog;
 
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.skf		= fp;
-	ctx.ret0_fp_idx = -1;
+	/* If constant blinding was enabled and we failed during blinding
+	 * then we must fall back to the interpreter. Otherwise, we save
+	 * the new JITed code.
+	 */
+	tmp = bpf_jit_blind_constants(prog);
 
-	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
-	if (ctx.offsets == NULL)
-		return;
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
 
-	/* fake pass to fill in the ctx->seen */
-	if (unlikely(build_body(&ctx)))
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	/* Not able to allocate memory for offsets[] , then
+	 * we must fall back to the interpreter
+	 */
+	ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	if (ctx.offsets == NULL) {
+		prog = orig_prog;
 		goto out;
+	}
+
+	/* 1) fake pass to find in the length of the JITed code,
+	 * to compute ctx->offsets and other context variables
+	 * needed to compute final JITed code.
+	 * Also, calculate random starting pointer/start of JITed code
+	 * which is prefixed by random number of fault instructions.
+	 *
+	 * If the first pass fails then there is no chance of it
+	 * being successful in the second pass, so just fall back
+	 * to the interpreter.
+	 */
+	if (build_body(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
 
 	tmp_idx = ctx.idx;
 	build_prologue(&ctx);
 	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
 
+	ctx.epilogue_offset = ctx.idx;
+
 #if __LINUX_ARM_ARCH__ < 7
 	tmp_idx = ctx.idx;
 	build_epilogue(&ctx);
@@ -1021,64 +1880,83 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	ctx.idx += ctx.imm_count;
 	if (ctx.imm_count) {
-		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
-		if (ctx.imms == NULL)
-			goto out;
+		ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
+		if (ctx.imms == NULL) {
+			prog = orig_prog;
+			goto out_off;
+		}
 	}
 #else
-	/* there's nothing after the epilogue on ARMv7 */
+	/* there's nothing about the epilogue on ARMv7 */
 	build_epilogue(&ctx);
 #endif
-	alloc_size = 4 * ctx.idx;
-	header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
-				      4, jit_fill_hole);
-	if (header == NULL)
-		goto out;
+	/* Now we can get the actual image size of the JITed arm code.
+	 * Currently, we are not considering the THUMB-2 instructions
+	 * for jit, although it can decrease the size of the image.
+	 *
+	 * As each arm instruction is of length 32bit, we are translating
+	 * number of JITed intructions into the size required to store these
+	 * JITed code.
+	 */
+	image_size = sizeof(u32) * ctx.idx;
 
-	ctx.target = (u32 *) target_ptr;
+	/* Now we know the size of the structure to make */
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	/* Not able to allocate memory for the structure then
+	 * we must fall back to the interpretation
+	 */
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_imms;
+	}
+
+	/* 2.) Actual pass to generate final JIT code */
+	ctx.target = (u32 *) image_ptr;
 	ctx.idx = 0;
 
 	build_prologue(&ctx);
+
+	/* If building the body of the JITed code fails somehow,
+	 * we fall back to the interpretation.
+	 */
 	if (build_body(&ctx) < 0) {
-#if __LINUX_ARM_ARCH__ < 7
-		if (ctx.imm_count)
-			kfree(ctx.imms);
-#endif
+		image_ptr = NULL;
 		bpf_jit_binary_free(header);
-		goto out;
+		prog = orig_prog;
+		goto out_imms;
 	}
 	build_epilogue(&ctx);
 
+	/* 3.) Extra pass to validate JITed Code */
+	if (validate_code(&ctx)) {
+		image_ptr = NULL;
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_imms;
+	}
 	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
 
-#if __LINUX_ARM_ARCH__ < 7
-	if (ctx.imm_count)
-		kfree(ctx.imms);
-#endif
-
 	if (bpf_jit_enable > 1)
 		/* there are 2 passes here */
-		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
 
 	set_memory_ro((unsigned long)header, header->pages);
-	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
-out:
+	prog->bpf_func = (void *)ctx.target;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+
+out_imms:
+#if __LINUX_ARM_ARCH__ < 7
+	if (ctx.imm_count)
+		kfree(ctx.imms);
+#endif
+out_off:
 	kfree(ctx.offsets);
-	return;
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
 }
 
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-	struct bpf_binary_header *header = (void *)addr;
-
-	if (!fp->jited)
-		goto free_filter;
-
-	set_memory_rw(addr, header->pages);
-	bpf_jit_binary_free(header);
-
-free_filter:
-	bpf_prog_unlock_free(fp);
-}
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index c46fca2972f7..d5cf5f6208aa 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -11,6 +11,7 @@
 #ifndef PFILTER_OPCODES_ARM_H
 #define PFILTER_OPCODES_ARM_H
 
+/* ARM 32bit Registers */
 #define ARM_R0	0
 #define ARM_R1	1
 #define ARM_R2	2
@@ -22,38 +23,43 @@
 #define ARM_R8	8
 #define ARM_R9	9
 #define ARM_R10	10
-#define ARM_FP	11
-#define ARM_IP	12
-#define ARM_SP	13
-#define ARM_LR	14
-#define ARM_PC	15
-
-#define ARM_COND_EQ		0x0
-#define ARM_COND_NE		0x1
-#define ARM_COND_CS		0x2
+#define ARM_FP	11	/* Frame Pointer */
+#define ARM_IP	12	/* Intra-procedure scratch register */
+#define ARM_SP	13	/* Stack pointer: as load/store base reg */
+#define ARM_LR	14	/* Link Register */
+#define ARM_PC	15	/* Program counter */
+
+#define ARM_COND_EQ		0x0	/* == */
+#define ARM_COND_NE		0x1	/* != */
+#define ARM_COND_CS		0x2	/* unsigned >= */
 #define ARM_COND_HS		ARM_COND_CS
-#define ARM_COND_CC		0x3
+#define ARM_COND_CC		0x3	/* unsigned < */
 #define ARM_COND_LO		ARM_COND_CC
-#define ARM_COND_MI		0x4
-#define ARM_COND_PL		0x5
-#define ARM_COND_VS		0x6
-#define ARM_COND_VC		0x7
-#define ARM_COND_HI		0x8
-#define ARM_COND_LS		0x9
-#define ARM_COND_GE		0xa
-#define ARM_COND_LT		0xb
-#define ARM_COND_GT		0xc
-#define ARM_COND_LE		0xd
-#define ARM_COND_AL		0xe
+#define ARM_COND_MI		0x4	/* < 0 */
+#define ARM_COND_PL		0x5	/* >= 0 */
+#define ARM_COND_VS		0x6	/* Signed Overflow */
+#define ARM_COND_VC		0x7	/* No Signed Overflow */
+#define ARM_COND_HI		0x8	/* unsigned > */
+#define ARM_COND_LS		0x9	/* unsigned <= */
+#define ARM_COND_GE		0xa	/* Signed >= */
+#define ARM_COND_LT		0xb	/* Signed < */
+#define ARM_COND_GT		0xc	/* Signed > */
+#define ARM_COND_LE		0xd	/* Signed <= */
+#define ARM_COND_AL		0xe	/* None */
 
 /* register shift types */
 #define SRTYPE_LSL		0
 #define SRTYPE_LSR		1
 #define SRTYPE_ASR		2
 #define SRTYPE_ROR		3
+#define SRTYPE_ASL		(SRTYPE_LSL)
 
 #define ARM_INST_ADD_R		0x00800000
+#define ARM_INST_ADDS_R		0x00900000
+#define ARM_INST_ADC_R		0x00a00000
+#define ARM_INST_ADC_I		0x02a00000
 #define ARM_INST_ADD_I		0x02800000
+#define ARM_INST_ADDS_I		0x02900000
 
 #define ARM_INST_AND_R		0x00000000
 #define ARM_INST_AND_I		0x02000000
@@ -76,8 +82,10 @@
 #define ARM_INST_LDRH_I		0x01d000b0
 #define ARM_INST_LDRH_R		0x019000b0
 #define ARM_INST_LDR_I		0x05900000
+#define ARM_INST_LDR_R		0x07900000
 
 #define ARM_INST_LDM		0x08900000
+#define ARM_INST_LDM_IA		0x08b00000
 
 #define ARM_INST_LSL_I		0x01a00000
 #define ARM_INST_LSL_R		0x01a00010
@@ -86,6 +94,7 @@
 #define ARM_INST_LSR_R		0x01a00030
 
 #define ARM_INST_MOV_R		0x01a00000
+#define ARM_INST_MOVS_R		0x01b00000
 #define ARM_INST_MOV_I		0x03a00000
 #define ARM_INST_MOVW		0x03000000
 #define ARM_INST_MOVT		0x03400000
@@ -96,17 +105,28 @@
 #define ARM_INST_PUSH		0x092d0000
 
 #define ARM_INST_ORR_R		0x01800000
+#define ARM_INST_ORRS_R		0x01900000
 #define ARM_INST_ORR_I		0x03800000
 
 #define ARM_INST_REV		0x06bf0f30
 #define ARM_INST_REV16		0x06bf0fb0
 
 #define ARM_INST_RSB_I		0x02600000
+#define ARM_INST_RSBS_I		0x02700000
+#define ARM_INST_RSC_I		0x02e00000
 
 #define ARM_INST_SUB_R		0x00400000
+#define ARM_INST_SUBS_R		0x00500000
+#define ARM_INST_RSB_R		0x00600000
 #define ARM_INST_SUB_I		0x02400000
+#define ARM_INST_SUBS_I		0x02500000
+#define ARM_INST_SBC_I		0x02c00000
+#define ARM_INST_SBC_R		0x00c00000
+#define ARM_INST_SBCS_R		0x00d00000
 
 #define ARM_INST_STR_I		0x05800000
+#define ARM_INST_STRB_I		0x05c00000
+#define ARM_INST_STRH_I		0x01c000b0
 
 #define ARM_INST_TST_R		0x01100000
 #define ARM_INST_TST_I		0x03100000
@@ -117,6 +137,8 @@
 
 #define ARM_INST_MLS		0x00600090
 
+#define ARM_INST_UXTH		0x06ff0070
+
 /*
  * Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
  * We need to be careful not to conflict with those used by other modules
@@ -135,9 +157,15 @@
 #define _AL3_R(op, rd, rn, rm)	((op ## _R) | (rd) << 12 | (rn) << 16 | (rm))
 /* immediate */
 #define _AL3_I(op, rd, rn, imm)	((op ## _I) | (rd) << 12 | (rn) << 16 | (imm))
+/* register with register-shift */
+#define _AL3_SR(inst)	(inst | (1 << 4))
 
 #define ARM_ADD_R(rd, rn, rm)	_AL3_R(ARM_INST_ADD, rd, rn, rm)
+#define ARM_ADDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ADDS, rd, rn, rm)
 #define ARM_ADD_I(rd, rn, imm)	_AL3_I(ARM_INST_ADD, rd, rn, imm)
+#define ARM_ADDS_I(rd, rn, imm)	_AL3_I(ARM_INST_ADDS, rd, rn, imm)
+#define ARM_ADC_R(rd, rn, rm)	_AL3_R(ARM_INST_ADC, rd, rn, rm)
+#define ARM_ADC_I(rd, rn, imm)	_AL3_I(ARM_INST_ADC, rd, rn, imm)
 
 #define ARM_AND_R(rd, rn, rm)	_AL3_R(ARM_INST_AND, rd, rn, rm)
 #define ARM_AND_I(rd, rn, imm)	_AL3_I(ARM_INST_AND, rd, rn, imm)
@@ -156,7 +184,9 @@
 #define ARM_EOR_I(rd, rn, imm)	_AL3_I(ARM_INST_EOR, rd, rn, imm)
 
 #define ARM_LDR_I(rt, rn, off)	(ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
-				 | (off))
+				 | ((off) & 0xfff))
+#define ARM_LDR_R(rt, rn, rm)	(ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \
+				 | (rm))
 #define ARM_LDRB_I(rt, rn, off)	(ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
 				 | (off))
 #define ARM_LDRB_R(rt, rn, rm)	(ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
@@ -167,15 +197,23 @@
 				 | (rm))
 
 #define ARM_LDM(rn, regs)	(ARM_INST_LDM | (rn) << 16 | (regs))
+#define ARM_LDM_IA(rn, regs)	(ARM_INST_LDM_IA | (rn) << 16 | (regs))
 
 #define ARM_LSL_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8)
 #define ARM_LSL_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7)
 
 #define ARM_LSR_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8)
 #define ARM_LSR_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7)
+#define ARM_ASR_R(rd, rn, rm)   (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8)
+#define ARM_ASR_I(rd, rn, imm)  (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7)
 
 #define ARM_MOV_R(rd, rm)	_AL3_R(ARM_INST_MOV, rd, 0, rm)
+#define ARM_MOVS_R(rd, rm)	_AL3_R(ARM_INST_MOVS, rd, 0, rm)
 #define ARM_MOV_I(rd, imm)	_AL3_I(ARM_INST_MOV, rd, 0, imm)
+#define ARM_MOV_SR(rd, rm, type, rs)	\
+	(_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_MOV_SI(rd, rm, type, imm6)	\
+	(ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7)
 
 #define ARM_MOVW(rd, imm)	\
 	(ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff))
@@ -190,19 +228,38 @@
 
 #define ARM_ORR_R(rd, rn, rm)	_AL3_R(ARM_INST_ORR, rd, rn, rm)
 #define ARM_ORR_I(rd, rn, imm)	_AL3_I(ARM_INST_ORR, rd, rn, imm)
-#define ARM_ORR_S(rd, rn, rm, type, rs)	\
-	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7)
+#define ARM_ORR_SR(rd, rn, rm, type, rs)	\
+	(_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORRS_R(rd, rn, rm)	_AL3_R(ARM_INST_ORRS, rd, rn, rm)
+#define ARM_ORRS_SR(rd, rn, rm, type, rs)	\
+	(_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORR_SI(rd, rn, rm, type, imm6)	\
+	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
+#define ARM_ORRS_SI(rd, rn, rm, type, imm6)	\
+	(ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
 
 #define ARM_REV(rd, rm)		(ARM_INST_REV | (rd) << 12 | (rm))
 #define ARM_REV16(rd, rm)	(ARM_INST_REV16 | (rd) << 12 | (rm))
 
 #define ARM_RSB_I(rd, rn, imm)	_AL3_I(ARM_INST_RSB, rd, rn, imm)
+#define ARM_RSBS_I(rd, rn, imm)	_AL3_I(ARM_INST_RSBS, rd, rn, imm)
+#define ARM_RSC_I(rd, rn, imm)	_AL3_I(ARM_INST_RSC, rd, rn, imm)
 
 #define ARM_SUB_R(rd, rn, rm)	_AL3_R(ARM_INST_SUB, rd, rn, rm)
+#define ARM_SUBS_R(rd, rn, rm)	_AL3_R(ARM_INST_SUBS, rd, rn, rm)
+#define ARM_RSB_R(rd, rn, rm)	_AL3_R(ARM_INST_RSB, rd, rn, rm)
+#define ARM_SBC_R(rd, rn, rm)	_AL3_R(ARM_INST_SBC, rd, rn, rm)
+#define ARM_SBCS_R(rd, rn, rm)	_AL3_R(ARM_INST_SBCS, rd, rn, rm)
 #define ARM_SUB_I(rd, rn, imm)	_AL3_I(ARM_INST_SUB, rd, rn, imm)
+#define ARM_SUBS_I(rd, rn, imm)	_AL3_I(ARM_INST_SUBS, rd, rn, imm)
+#define ARM_SBC_I(rd, rn, imm)	_AL3_I(ARM_INST_SBC, rd, rn, imm)
 
 #define ARM_STR_I(rt, rn, off)	(ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
-				 | (off))
+				 | ((off) & 0xfff))
+#define ARM_STRH_I(rt, rn, off)	(ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \
+				 | (((off) & 0xf0) << 4) | ((off) & 0xf))
+#define ARM_STRB_I(rt, rn, off)	(ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \
+				 | (((off) & 0xf0) << 4) | ((off) & 0xf))
 
 #define ARM_TST_R(rn, rm)	_AL3_R(ARM_INST_TST, 0, rn, rm)
 #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
@@ -214,5 +271,6 @@
 
 #define ARM_MLS(rd, rn, rm, ra)	(ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
 				 | (ra) << 12)
+#define ARM_UXTH(rd, rm)	(ARM_INST_UXTH | (rd) << 12 | (rm))
 
 #endif /* PFILTER_OPCODES_ARM_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..0df64a6a56d4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -75,6 +75,7 @@ config ARM64
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_VMAP_STACK
 	select HAVE_ARM_SMCCC
 	select HAVE_EBPF_JIT
 	select HAVE_C_RECORDMCOUNT
@@ -960,6 +961,18 @@ config ARM64_UAO
 	  regular load/store instructions if the cpu does not implement the
 	  feature.
 
+config ARM64_PMEM
+	bool "Enable support for persistent memory"
+	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_UACCESS_FLUSHCACHE
+	help
+	  Say Y to enable support for the persistent memory API based on the
+	  ARMv8.2 DCPoP feature.
+
+	  The feature is detected at runtime, and the kernel will use DC CVAC
+	  operations if DC CVAP is not supported (following the behaviour of
+	  DC CVAP itself if the system does not define a point of persistence).
+
 endmenu
 
 config ARM64_MODULE_CMODEL_LARGE
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
index e2b0da2c0bc7..105b2938082f 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
@@ -280,9 +280,6 @@
 
 &decon {
 	status = "okay";
-
-	i80-if-timings {
-	};
 };
 
 &decon_tv {
@@ -1116,9 +1113,6 @@
 
 &mic {
 	status = "okay";
-
-	i80-if-timings {
-	};
 };
 
 &pmu_system_controller {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index 31fd77f82ced..d16b9cc1e825 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -653,21 +653,21 @@
 		};
 
 		msi1: msi-controller1@1571000 {
-			compatible = "fsl,1s1043a-msi";
+			compatible = "fsl,ls1043a-msi";
 			reg = <0x0 0x1571000 0x0 0x8>;
 			msi-controller;
 			interrupts = <0 116 0x4>;
 		};
 
 		msi2: msi-controller2@1572000 {
-			compatible = "fsl,1s1043a-msi";
+			compatible = "fsl,ls1043a-msi";
 			reg = <0x0 0x1572000 0x0 0x8>;
 			msi-controller;
 			interrupts = <0 126 0x4>;
 		};
 
 		msi3: msi-controller3@1573000 {
-			compatible = "fsl,1s1043a-msi";
+			compatible = "fsl,ls1043a-msi";
 			reg = <0x0 0x1573000 0x0 0x8>;
 			msi-controller;
 			interrupts = <0 160 0x4>;
@@ -689,7 +689,7 @@
 			bus-range = <0x0 0xff>;
 			ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
 				  0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
-			msi-parent = <&msi1>;
+			msi-parent = <&msi1>, <&msi2>, <&msi3>;
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0000 0 0 1 &gic 0 110 0x4>,
@@ -714,7 +714,7 @@
 			bus-range = <0x0 0xff>;
 			ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
 				  0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
-			msi-parent = <&msi2>;
+			msi-parent = <&msi1>, <&msi2>, <&msi3>;
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0000 0 0 1 &gic 0 120  0x4>,
@@ -739,7 +739,7 @@
 			bus-range = <0x0 0xff>;
 			ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000   /* downstream I/O */
 				  0x82000000 0x0 0x40000000 0x50 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
-			msi-parent = <&msi3>;
+			msi-parent = <&msi1>, <&msi2>, <&msi3>;
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0000 0 0 1 &gic 0 154 0x4>,
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index dc1640be0345..c8ff0baddf1d 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -630,6 +630,37 @@
 			interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&clockgen 4 1>;
 		};
+
+		msi1: msi-controller@1580000 {
+			compatible = "fsl,ls1046a-msi";
+			msi-controller;
+			reg = <0x0 0x1580000 0x0 0x10000>;
+			interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		msi2: msi-controller@1590000 {
+			compatible = "fsl,ls1046a-msi";
+			msi-controller;
+			reg = <0x0 0x1590000 0x0 0x10000>;
+			interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		msi3: msi-controller@15a0000 {
+			compatible = "fsl,ls1046a-msi";
+			msi-controller;
+			reg = <0x0 0x15a0000 0x0 0x10000>;
+			interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
 	};
 
 	reserved-memory {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
index cf272392cebf..b9f36dad17e6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
@@ -50,6 +50,23 @@
 	chosen {
 		stdout-path = "serial2:1500000n8";
 	};
+
+	vcc_phy: vcc-phy-regulator {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_phy";
+		regulator-always-on;
+		regulator-boot-on;
+	};
+};
+
+&gmac2phy {
+	phy-supply = <&vcc_phy>;
+	clock_in_out = "output";
+	assigned-clocks = <&cru SCLK_MAC2PHY_SRC>;
+	assigned-clock-rate = <50000000>;
+	assigned-clocks = <&cru SCLK_MAC2PHY>;
+	assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>;
+	status = "okay";
 };
 
 &uart2 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 0be96cee27bd..d48bf5d9f8bd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -63,6 +63,8 @@
 		i2c1 = &i2c1;
 		i2c2 = &i2c2;
 		i2c3 = &i2c3;
+		ethernet0 = &gmac2io;
+		ethernet1 = &gmac2phy;
 	};
 
 	cpus {
@@ -424,6 +426,43 @@
 		status = "disabled";
 	};
 
+	gmac2phy: ethernet@ff550000 {
+		compatible = "rockchip,rk3328-gmac";
+		reg = <0x0 0xff550000 0x0 0x10000>;
+		rockchip,grf = <&grf>;
+		interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "macirq";
+		clocks = <&cru SCLK_MAC2PHY_SRC>, <&cru SCLK_MAC2PHY_RXTX>,
+			 <&cru SCLK_MAC2PHY_RXTX>, <&cru SCLK_MAC2PHY_REF>,
+			 <&cru ACLK_MAC2PHY>, <&cru PCLK_MAC2PHY>,
+			 <&cru SCLK_MAC2PHY_OUT>;
+		clock-names = "stmmaceth", "mac_clk_rx",
+			      "mac_clk_tx", "clk_mac_ref",
+			      "aclk_mac", "pclk_mac",
+			      "clk_macphy";
+		resets = <&cru SRST_GMAC2PHY_A>, <&cru SRST_MACPHY>;
+		reset-names = "stmmaceth", "mac-phy";
+		phy-mode = "rmii";
+		phy-handle = <&phy>;
+		status = "disabled";
+
+		mdio {
+			compatible = "snps,dwmac-mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			phy: phy@0 {
+				compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+				reg = <0>;
+				clocks = <&cru SCLK_MAC2PHY_OUT>;
+				resets = <&cru SRST_MACPHY>;
+				pinctrl-names = "default";
+				pinctrl-0 = <&fephyled_rxm1 &fephyled_linkm1>;
+				phy-is-integrated;
+			};
+		};
+	};
+
 	gic: interrupt-controller@ff811000 {
 		compatible = "arm,gic-400";
 		#interrupt-cells = <3>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index b4ca115b3be1..cdde4f56a281 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -203,6 +203,7 @@ CONFIG_MARVELL_PHY=m
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
 CONFIG_REALTEK_PHY=m
+CONFIG_ROCKCHIP_PHY=y
 CONFIG_USB_PEGASUS=m
 CONFIG_USB_RTL8150=m
 CONFIG_USB_RTL8152=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index d92293747d63..7ca54a76f6b9 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64
 
 config CRYPTO_SHA1_ARM64_CE
 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA1
 
 config CRYPTO_SHA2_ARM64_CE
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA256_ARM64
 
 config CRYPTO_GHASH_ARM64_CE
-	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_GF128MUL
+	select CRYPTO_AES
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 
@@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 endif
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 3363560c79b7..e3a375c4cb83 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,7 +1,7 @@
 /*
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.16b}, [x4]			/* load first round key */
+1:	ld1	{v3.4s}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	b	4f
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
+4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
+5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.16b}, [x6], #16		/* load next round key */
+	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
 	mov	v5.16b, v3.16b
 	b	2f
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
+1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
+2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
+3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.16b}, [x3]			/* load first round key */
+	ld1	{v3.4s}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
 	b	3f
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
+3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
+4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.16b}, [x10], #16		/* load next round key */
+	ld1	{v5.4s}, [x10], #16		/* load next round key */
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 6a7dbc7c83a6..a1254036f2b1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -1,7 +1,7 @@
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 				 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+			   u32 abytes, u32 *macp, bool use_neon)
+{
+	if (likely(use_neon)) {
+		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+				     num_rounds(key));
+	} else {
+		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+			int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+			crypto_xor(&mac[*macp], in, added);
+
+			*macp += added;
+			in += added;
+			abytes -= added;
+		}
+
+		while (abytes > AES_BLOCK_SIZE) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+			in += AES_BLOCK_SIZE;
+			abytes -= AES_BLOCK_SIZE;
+		}
+
+		if (abytes > 0) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, abytes);
+			*macp = abytes;
+		} else {
+			*macp = 0;
+		}
+	}
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+				   bool use_neon)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 		ltag.len = 6;
 	}
 
-	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-			     num_rounds(ctx));
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-				     num_rounds(ctx));
+		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
 		len -= n;
 
 		scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 	} while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+			      struct crypto_aes_ctx *ctx, bool enc)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	int err = 0;
+
+	while (walk->nbytes) {
+		int blocks = walk->nbytes / AES_BLOCK_SIZE;
+		u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+		u8 *dst = walk->dst.virt.addr;
+		u8 *src = walk->src.virt.addr;
+		u32 nbytes = walk->nbytes;
+
+		if (nbytes == walk->total && tail > 0) {
+			blocks++;
+			tail = 0;
+		}
+
+		do {
+			u32 bsize = AES_BLOCK_SIZE;
+
+			if (nbytes < AES_BLOCK_SIZE)
+				bsize = nbytes;
+
+			crypto_inc(walk->iv, AES_BLOCK_SIZE);
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+					    num_rounds(ctx));
+			__aes_arm64_encrypt(ctx->key_enc, mac, mac,
+					    num_rounds(ctx));
+			if (enc)
+				crypto_xor(mac, src, bsize);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			if (!enc)
+				crypto_xor(mac, dst, bsize);
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (--blocks);
+
+		err = skcipher_walk_done(walk, tail);
+	}
+
+	if (!err) {
+		__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+		__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-		if (walk.nbytes == walk.total)
-			tail = 0;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			ce_aes_ccm_encrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-	kernel_neon_end();
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+	}
 	if (err)
 		return err;
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		if (walk.nbytes == walk.total)
-			tail = 0;
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			ce_aes_ccm_decrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
-	kernel_neon_end();
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+	}
 
 	if (err)
 		return err;
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index 50d9fe11d0c8..6a75cd75ed11 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -1,7 +1,7 @@
 /*
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,8 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
@@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
@@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aese	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
 
-	memcpy(ctx->key_enc, in_key, key_len);
 	ctx->key_length = key_len;
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
-	kernel_neon_begin_partial(2);
+	kernel_neon_begin();
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
-		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
-			 rki[0];
-#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.16b}, %[in]		;"
+		__asm__("ld1	{v0.4s}, %[in]		;"
 			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.16b}, %[out]	;"
+			"st1	{v1.4s}, %[out]	;"
 
 		:	[out]	"=Q"(key_dec[i])
 		:	[in]	"Q"(key_enc[j])
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index b46093d567e5..50330f5c3adc 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -2,7 +2,7 @@
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  *                                    Crypto Extensions
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
 	cmp		\rounds, #12
 	blo		2222f		/* 128 bits */
 	beq		1111f		/* 192 bits */
-	ld1		{v17.16b-v18.16b}, [\rk], #32
-1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
-2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
-	ld1		{v25.16b-v28.16b}, [\rk], #64
-	ld1		{v29.16b-v31.16b}, [\rk]
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
 	.endm
 
 	/* prepare for encryption with key in rk[] */
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index f2f9cc519309..6d2445d603cc 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 
 	.text
 
@@ -17,94 +18,155 @@
 	out		.req	x1
 	in		.req	x2
 	rounds		.req	x3
-	tt		.req	x4
-	lt		.req	x2
+	tt		.req	x2
 
-	.macro		__pair, enc, reg0, reg1, in0, in1e, in1d, shift
+	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	.ifc		\op\shift, b0
+	ubfiz		\reg0, \in0, #2, #8
+	ubfiz		\reg1, \in1e, #2, #8
+	.else
 	ubfx		\reg0, \in0, #\shift, #8
-	.if		\enc
 	ubfx		\reg1, \in1e, #\shift, #8
-	.else
-	ubfx		\reg1, \in1d, #\shift, #8
 	.endif
+
+	/*
+	 * AArch64 cannot do byte size indexed loads from a table containing
+	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+	 * valid instruction. So perform the shift explicitly first for the
+	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
+	 * than ubfx above)
+	 */
+	.ifnc		\op, b
 	ldr		\reg0, [tt, \reg0, uxtw #2]
 	ldr		\reg1, [tt, \reg1, uxtw #2]
+	.else
+	.if		\shift > 0
+	lsl		\reg0, \reg0, #2
+	lsl		\reg1, \reg1, #2
+	.endif
+	ldrb		\reg0, [tt, \reg0, uxtw]
+	ldrb		\reg1, [tt, \reg1, uxtw]
+	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	ubfx		\reg0, \in0, #\shift, #8
+	ubfx		\reg1, \in1d, #\shift, #8
+	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
+	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
+	.endm
+
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
 	ldp		\out0, \out1, [rk], #8
 
-	__pair		\enc, w13, w14, \in0, \in1, \in3, 0
-	__pair		\enc, w15, w16, \in1, \in2, \in0, 8
-	__pair		\enc, w17, w18, \in2, \in3, \in1, 16
-	__pair		\enc, \t0, \t1, \in3, \in0, \in2, 24
-
-	eor		\out0, \out0, w13
-	eor		\out1, \out1, w14
-	eor		\out0, \out0, w15, ror #24
-	eor		\out1, \out1, w16, ror #24
-	eor		\out0, \out0, w17, ror #16
-	eor		\out1, \out1, w18, ror #16
+	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
+	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
+	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
+	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+	eor		\out0, \out0, w12
+	eor		\out1, \out1, w13
+	eor		\out0, \out0, w14, ror #24
+	eor		\out1, \out1, w15, ror #24
+	eor		\out0, \out0, w16, ror #16
+	eor		\out1, \out1, w17, ror #16
 	eor		\out0, \out0, \t0, ror #8
 	eor		\out1, \out1, \t1, ror #8
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
-	ldp		w5, w6, [in]
-	ldp		w7, w8, [in, #8]
-	ldp		w9, w10, [rk], #16
-	ldp		w11, w12, [rk, #-8]
+	.macro		do_crypt, round, ttab, ltab, bsz
+	ldp		w4, w5, [in]
+	ldp		w6, w7, [in, #8]
+	ldp		w8, w9, [rk], #16
+	ldp		w10, w11, [rk, #-8]
 
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
+	eor		w4, w4, w8
 	eor		w5, w5, w9
 	eor		w6, w6, w10
 	eor		w7, w7, w11
-	eor		w8, w8, w12
 
 	adr_l		tt, \ttab
-	adr_l		lt, \ltab
 
 	tbnz		rounds, #1, 1f
 
-0:	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
+0:	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	\round		w4, w5, w6, w7, w8, w9, w10, w11
 
 1:	subs		rounds, rounds, #4
-	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	csel		tt, tt, lt, hi
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
-	b.hi		0b
-
+	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	b.ls		3f
+2:	\round		w4, w5, w6, w7, w8, w9, w10, w11
+	b		0b
+3:	adr_l		tt, \ltab
+	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
-	stp		w5, w6, [out]
-	stp		w7, w8, [out, #8]
+	stp		w4, w5, [out]
+	stp		w6, w7, [out, #8]
 	ret
 	.endm
 
-	.align		5
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
 ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm64_encrypt)
 
 	.align		5
 ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
 ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h
new file mode 100644
index 000000000000..c9285717b6b5
--- /dev/null
+++ b/arch/arm64/crypto/aes-ctr-fallback.h
@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+					   struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			tail = walk.nbytes % AES_BLOCK_SIZE;
+		}
+
+		do {
+			int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+					    6 + ctx->key_length / 4);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+			nbytes -= AES_BLOCK_SIZE;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index bcf596b0197e..998ba519a026 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -10,6 +10,7 @@
 
 #include <asm/neon.h>
 #include <asm/hwcap.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
 #include <crypto/xts.h>
 
 #include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
@@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 				blocks, walk.iv, first);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
@@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(ctx, req);
+
+	return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
 	.ivsize		= AES_BLOCK_SIZE,
 	.chunksize	= AES_BLOCK_SIZE,
 	.setkey		= skcipher_aes_setkey,
-	.encrypt	= ctr_encrypt,
-	.decrypt	= ctr_encrypt,
+	.encrypt	= ctr_encrypt_sync,
+	.decrypt	= ctr_encrypt_sync,
 }, {
 	.base = {
 		.cra_name		= "__xts(aes)",
@@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+			  u8 dg[], int enc_before, int enc_after)
+{
+	int rounds = 6 + ctx->key_length / 4;
+
+	if (may_use_simd()) {
+		kernel_neon_begin();
+		aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+			       enc_after);
+		kernel_neon_end();
+	} else {
+		if (enc_before)
+			__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+		while (blocks--) {
+			crypto_xor(dg, in, AES_BLOCK_SIZE);
+			in += AES_BLOCK_SIZE;
+
+			if (blocks || enc_after)
+				__aes_arm64_encrypt(ctx->key_enc, dg, dg,
+						    rounds);
+		}
+	}
+}
+
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
 	while (len > 0) {
 		unsigned int l;
@@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 
 			len %= AES_BLOCK_SIZE;
 
-			kernel_neon_begin();
-			aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
-				       ctx->dg, (ctx->len != 0), (len != 0));
-			kernel_neon_end();
+			mac_do_update(&tctx->key, p, blocks, ctx->dg,
+				      (ctx->len != 0), (len != 0));
 
 			p += blocks * AES_BLOCK_SIZE;
 
@@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
-	kernel_neon_begin();
-	aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
@@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 	u8 *consts = tctx->consts;
 
 	if (ctx->len != AES_BLOCK_SIZE) {
@@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 		consts += AES_BLOCK_SIZE;
 	}
 
-	kernel_neon_begin();
-	aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index db2501d93550..c55d68ccb89f 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -1,7 +1,7 @@
 /*
  * Bit sliced AES using NEON instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
+#include "aes-ctr-fallback.h"
+
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
 
+struct aesbs_ctr_ctx {
+	struct aesbs_ctx	key;		/* must be first member */
+	struct crypto_aes_ctx	fallback;
+};
+
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+				 unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->key.rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;
@@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 	return aesbs_setkey(tfm, in_key, key_len);
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+	return ctr_encrypt(req);
+}
+
 static int __xts_crypt(struct skcipher_request *req,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]))
@@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250 - 1,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
-	.encrypt		= ctr_encrypt,
-	.decrypt		= ctr_encrypt,
+	.setkey			= aesbs_ctr_setkey_sync,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
 }, {
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_driver_name	= "__xts-aes-neonbs",
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
index a7cd575ea223..cbdb75d15cd0 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -1,7 +1,7 @@
 /*
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
 	u32 state[16];
 	int err;
 
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 
 	err = skcipher_walk_virt(&walk, req, true);
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index eccb1ae90064..624f4137918c 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 
 #define PMULL_MIN_LEN		64L	/* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32_pmull_le(data, l, *crc);
 		kernel_neon_end();
 
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32c_pmull_le(data, l, *crc);
 		kernel_neon_end();
 
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 60cb590c2590..96f0cae4a022 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
 #include <crypto/internal/hash.h>
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
 	}
 
 	if (length > 0) {
-		kernel_neon_begin_partial(14);
-		*crc = crc_t10dif_pmull(*crc, data, length);
-		kernel_neon_end();
+		if (may_use_simd()) {
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull(*crc, data, length);
+			kernel_neon_end();
+		} else {
+			*crc = crc_t10dif_generic(*crc, data, length);
+		}
 	}
 
 	return 0;
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index f0bb9f0b524f..11ebf1ae248a 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -11,31 +11,215 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	SHASH	.req	v0
-	SHASH2	.req	v1
-	T1	.req	v2
-	T2	.req	v3
-	MASK	.req	v4
-	XL	.req	v5
-	XM	.req	v6
-	XH	.req	v7
-	IN1	.req	v7
+	SHASH		.req	v0
+	SHASH2		.req	v1
+	T1		.req	v2
+	T2		.req	v3
+	MASK		.req	v4
+	XL		.req	v5
+	XM		.req	v6
+	XH		.req	v7
+	IN1		.req	v7
+
+	k00_16		.req	v8
+	k32_48		.req	v9
+
+	t3		.req	v10
+	t4		.req	v11
+	t5		.req	v12
+	t6		.req	v13
+	t7		.req	v14
+	t8		.req	v15
+	t9		.req	v16
+
+	perm1		.req	v17
+	perm2		.req	v18
+	perm3		.req	v19
+
+	sh1		.req	v20
+	sh2		.req	v21
+	sh3		.req	v22
+	sh4		.req	v23
+
+	ss1		.req	v24
+	ss2		.req	v25
+	ss3		.req	v26
+	ss4		.req	v27
 
 	.text
 	.arch		armv8-a+crypto
 
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update)
+	.macro		__pmull_p64, rd, rn, rm
+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
+	.endm
+
+	.macro		__pmull2_p64, rd, rn, rm
+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
+	.endm
+
+	.macro		__pmull_p8, rq, ad, bd
+	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
+	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
+	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
+
+	__pmull_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull2_p8, rq, ad, bd
+	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
+	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
+	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
+
+	__pmull2_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_SHASH2, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+	.endm
+
+	.macro		__pmull2_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
+	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
+	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
+	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
+	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
+	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
+	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
+	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
+
+	eor		t3.16b, t3.16b, t4.16b			// L = E + F
+	eor		t5.16b, t5.16b, t6.16b			// M = G + H
+	eor		t7.16b, t7.16b, t8.16b			// N = I + J
+
+	uzp1		t4.2d, t3.2d, t5.2d
+	uzp2		t3.2d, t3.2d, t5.2d
+	uzp1		t6.2d, t7.2d, t9.2d
+	uzp2		t7.2d, t7.2d, t9.2d
+
+	// t3 = (L) (P0 + P1) << 8
+	// t5 = (M) (P2 + P3) << 16
+	eor		t4.16b, t4.16b, t3.16b
+	and		t3.16b, t3.16b, k32_48.16b
+
+	// t7 = (N) (P4 + P5) << 24
+	// t9 = (K) (P6 + P7) << 32
+	eor		t6.16b, t6.16b, t7.16b
+	and		t7.16b, t7.16b, k00_16.16b
+
+	eor		t4.16b, t4.16b, t3.16b
+	eor		t6.16b, t6.16b, t7.16b
+
+	zip2		t5.2d, t4.2d, t3.2d
+	zip1		t3.2d, t4.2d, t3.2d
+	zip2		t9.2d, t6.2d, t7.2d
+	zip1		t7.2d, t6.2d, t7.2d
+
+	ext		t3.16b, t3.16b, t3.16b, #15
+	ext		t5.16b, t5.16b, t5.16b, #14
+	ext		t7.16b, t7.16b, t7.16b, #13
+	ext		t9.16b, t9.16b, t9.16b, #12
+
+	eor		t3.16b, t3.16b, t5.16b
+	eor		t7.16b, t7.16b, t9.16b
+	eor		\rq\().16b, \rq\().16b, t3.16b
+	eor		\rq\().16b, \rq\().16b, t7.16b
+	.endm
+
+	.macro		__pmull_pre_p64
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
+	.endm
+
+	.macro		__pmull_pre_p8
+	// k00_16 := 0x0000000000000000_000000000000ffff
+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
+	movi		k32_48.2d, #0xffffffff
+	mov		k32_48.h[2], k32_48.h[0]
+	ushr		k00_16.2d, k32_48.2d, #32
+
+	// prepare the permutation vectors
+	mov_q		x5, 0x080f0e0d0c0b0a09
+	movi		T1.8b, #8
+	dup		perm1.2d, x5
+	eor		perm1.16b, perm1.16b, T1.16b
+	ushr		perm2.2d, perm1.2d, #8
+	ushr		perm3.2d, perm1.2d, #16
+	ushr		T1.2d, perm1.2d, #24
+	sli		perm2.2d, perm1.2d, #56
+	sli		perm3.2d, perm1.2d, #48
+	sli		T1.2d, perm1.2d, #40
+
+	// precompute loop invariants
+	tbl		sh1.16b, {SHASH.16b}, perm1.16b
+	tbl		sh2.16b, {SHASH.16b}, perm2.16b
+	tbl		sh3.16b, {SHASH.16b}, perm3.16b
+	tbl		sh4.16b, {SHASH.16b}, T1.16b
+	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
+	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
+	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
+	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	pmull		T2.1q, XL.1d, MASK.1d
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XL.d[1], XM.d[0]
+	mov		XH.d[0], XM.d[1]
+
+	shl		T1.2d, XL.2d, #57
+	shl		T2.2d, XL.2d, #62
+	eor		T2.16b, T2.16b, T1.16b
+	shl		T1.2d, XL.2d, #63
+	eor		T2.16b, T2.16b, T1.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, T2.16b, T1.16b
+
+	mov		XL.d[1], T2.d[0]
+	mov		XH.d[0], T2.d[1]
+
+	ushr		T2.2d, XL.2d, #1
+	eor		XH.16b, XH.16b, XL.16b
+	eor		XL.16b, XL.16b, T2.16b
+	ushr		T2.2d, T2.2d, #6
+	ushr		XL.2d, XL.2d, #1
+	.endm
+
+	.macro		__pmull_ghash, pn
 	ld1		{SHASH.2d}, [x3]
 	ld1		{XL.2d}, [x1]
-	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
+	__pmull_pre_\pn
+
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
 	ld1		{T1.2d}, [x4]
@@ -52,28 +236,209 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T1.16b, T1.16b, T2.16b
 	eor		XL.16b, XL.16b, IN1.16b
 
+	__pmull2_\pn	XH, XL, SHASH			// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	__pmull_\pn 	XL, XL, SHASH			// a0 * b0
+	__pmull_\pn	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
+
+	eor		T2.16b, XL.16b, XH.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		XM.16b, XM.16b, T2.16b
+
+	__pmull_reduce_\pn
+
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
+
+	cbnz		w0, 0b
+
+	st1		{XL.2d}, [x1]
+	ret
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	__pmull_ghash	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	__pmull_ghash	p8
+ENDPROC(pmull_ghash_update_p8)
+
+	KS		.req	v8
+	CTR		.req	v9
+	INP		.req	v10
+
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
+	.endm
+
+	.macro		enc_round, state, key
+	aese		\state\().16b, \key\().16b
+	aesmc		\state\().16b, \state\().16b
+	.endm
+
+	.macro		enc_block, state, rounds
+	cmp		\rounds, #12
+	b.lo		2222f		/* 128 bits */
+	b.eq		1111f		/* 192 bits */
+	enc_round	\state, v17
+	enc_round	\state, v18
+1111:	enc_round	\state, v19
+	enc_round	\state, v20
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	enc_round	\state, \key
+	.endr
+	aese		\state\().16b, v30.16b
+	eor		\state\().16b, \state\().16b, v31.16b
+	.endm
+
+	.macro		pmull_gcm_do_crypt, enc
+	ld1		{SHASH.2d}, [x4]
+	ld1		{XL.2d}, [x1]
+	ldr		x8, [x5, #8]			// load lower counter
+
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(	rev		x8, x8		)
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
+
+	.if		\enc == 1
+	ld1		{KS.16b}, [x7]
+	.endif
+
+0:	ld1		{CTR.8b}, [x5]			// load upper counter
+	ld1		{INP.16b}, [x3], #16
+	rev		x9, x8
+	add		x8, x8, #1
+	sub		w0, w0, #1
+	ins		CTR.d[1], x9			// set lower counter
+
+	.if		\enc == 1
+	eor		INP.16b, INP.16b, KS.16b	// encrypt input
+	st1		{INP.16b}, [x2], #16
+	.endif
+
+	rev64		T1.16b, INP.16b
+
+	cmp		w6, #12
+	b.ge		2f				// AES-192/256?
+
+1:	enc_round	CTR, v21
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+
+	enc_round	CTR, v22
+
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	enc_round	CTR, v23
+
 	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
 	eor		T1.16b, T1.16b, XL.16b
+
+	enc_round	CTR, v24
+
 	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
 	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
 
+	enc_round	CTR, v25
+
 	ext		T1.16b, XL.16b, XH.16b, #8
 	eor		T2.16b, XL.16b, XH.16b
 	eor		XM.16b, XM.16b, T1.16b
+
+	enc_round	CTR, v26
+
 	eor		XM.16b, XM.16b, T2.16b
 	pmull		T2.1q, XL.1d, MASK.1d
 
+	enc_round	CTR, v27
+
 	mov		XH.d[0], XM.d[1]
 	mov		XM.d[1], XL.d[0]
 
+	enc_round	CTR, v28
+
 	eor		XL.16b, XM.16b, T2.16b
+
+	enc_round	CTR, v29
+
 	ext		T2.16b, XL.16b, XL.16b, #8
+
+	aese		CTR.16b, v30.16b
+
 	pmull		XL.1q, XL.1d, MASK.1d
 	eor		T2.16b, T2.16b, XH.16b
+
+	eor		KS.16b, CTR.16b, v31.16b
+
 	eor		XL.16b, XL.16b, T2.16b
 
+	.if		\enc == 0
+	eor		INP.16b, INP.16b, KS.16b
+	st1		{INP.16b}, [x2], #16
+	.endif
+
 	cbnz		w0, 0b
 
+CPU_LE(	rev		x8, x8		)
 	st1		{XL.2d}, [x1]
+	str		x8, [x5, #8]			// store lower counter
+
+	.if		\enc == 1
+	st1		{KS.16b}, [x7]
+	.endif
+
+	ret
+
+2:	b.eq		3f				// AES-192?
+	enc_round	CTR, v17
+	enc_round	CTR, v18
+3:	enc_round	CTR, v19
+	enc_round	CTR, v20
+	b		1b
+	.endm
+
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds, u8 ks[])
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
+
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
+
+	/*
+	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+	 */
+ENTRY(pmull_gcm_encrypt_block)
+	cbz		x2, 0f
+	load_round_keys	w3, x2
+0:	ld1		{v0.16b}, [x1]
+	enc_block	v0, w3
+	st1		{v0.16b}, [x0]
 	ret
-ENDPROC(pmull_ghash_update)
+ENDPROC(pmull_gcm_encrypt_block)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 833ec1e3f3e9..cfc9c92814fd 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -9,22 +9,33 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
+#define GCM_IV_SIZE		12
 
 struct ghash_key {
 	u64 a;
 	u64 b;
+	be128 k;
 };
 
 struct ghash_desc_ctx {
@@ -33,8 +44,35 @@ struct ghash_desc_ctx {
 	u32 count;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+struct gcm_aes_ctx {
+	struct crypto_aes_ctx	aes_key;
+	struct ghash_key	ghash_key;
+};
+
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
+
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+					u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+			    struct ghash_key *key, const char *head)
+{
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, dg, src, key, head);
+		kernel_neon_end();
+	} else {
+		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+		do {
+			const u8 *in = src;
+
+			if (head) {
+				in = head;
+				blocks++;
+				head = NULL;
+			} else {
+				src += GHASH_BLOCK_SIZE;
+			}
+
+			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dst, &key->k);
+		} while (--blocks);
+
+		dg[0] = be64_to_cpu(dst.b);
+		dg[1] = be64_to_cpu(dst.a);
+	}
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 			unsigned int len)
 {
@@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(blocks, ctx->digest, src, key,
-				   partial ? ctx->buf : NULL);
-		kernel_neon_end();
+		ghash_do_update(blocks, ctx->digest, src, key,
+				partial ? ctx->buf : NULL);
+
 		src += blocks * GHASH_BLOCK_SIZE;
 		partial = 0;
 	}
@@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-		kernel_neon_end();
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
-static int ghash_setkey(struct crypto_shash *tfm,
-			const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+			  const u8 *inkey, unsigned int keylen)
 {
-	struct ghash_key *key = crypto_shash_ctx(tfm);
 	u64 a, b;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
 	/* perform multiplication by 'x' in GF(2^128) */
 	b = get_unaligned_be64(inkey);
@@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return 0;
 }
 
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return __ghash_setkey(key, inkey, keylen);
+}
+
 static struct shash_alg ghash_alg = {
-	.digestsize	= GHASH_DIGEST_SIZE,
-	.init		= ghash_init,
-	.update		= ghash_update,
-	.final		= ghash_final,
-	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
-	.base		= {
-		.cra_name		= "ghash",
-		.cra_driver_name	= "ghash-ce",
-		.cra_priority		= 200,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_key),
-		.cra_module		= THIS_MODULE,
-	},
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update,
+	.final			= ghash_final,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+		      unsigned int keylen)
+{
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	u8 key[GHASH_BLOCK_SIZE];
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+	if (ret) {
+		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+			    num_rounds(&ctx->aes_key));
+
+	return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12 ... 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+			   int *buf_count, struct gcm_aes_ctx *ctx)
+{
+	if (*buf_count > 0) {
+		int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+		memcpy(&buf[*buf_count], src, buf_added);
+
+		*buf_count += buf_added;
+		src += buf_added;
+		count -= buf_added;
+	}
+
+	if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+		int blocks = count / GHASH_BLOCK_SIZE;
+
+		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+				*buf_count ? buf : NULL);
+
+		src += blocks * GHASH_BLOCK_SIZE;
+		count %= GHASH_BLOCK_SIZE;
+		*buf_count = 0;
+	}
+
+	if (count > 0) {
+		memcpy(buf, src, count);
+		*buf_count = count;
+	}
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	u8 buf[GHASH_BLOCK_SIZE];
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	int buf_count = 0;
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+
+		gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+
+	if (buf_count) {
+		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+	}
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+		      u64 dg[], u8 tag[], int cryptlen)
+{
+	u8 mac[AES_BLOCK_SIZE];
+	u128 lengths;
+
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(cryptlen * 8);
+
+	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+	put_unaligned_be64(dg[1], mac);
+	put_unaligned_be64(dg[0], mac + 8);
+
+	crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 ks[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+		pmull_gcm_encrypt_block(ks, iv, NULL,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key), ks);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    ks, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+					walk.dst.virt.addr, &ctx->ghash_key,
+					NULL);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		u8 buf[GHASH_BLOCK_SIZE];
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+			       walk.nbytes);
+
+		memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key));
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			pmull_gcm_encrypt_block(iv, iv, NULL,
+						num_rounds(&ctx->aes_key));
+
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			ghash_do_update(blocks, dg, walk.src.virt.addr,
+					&ctx->ghash_key, NULL);
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    buf, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		memcpy(buf, walk.src.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+			       walk.nbytes);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(tag, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+	.ivsize			= GCM_IV_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.maxauthsize		= AES_BLOCK_SIZE,
+	.setkey			= gcm_setkey,
+	.setauthsize		= gcm_setauthsize,
+	.encrypt		= gcm_encrypt,
+	.decrypt		= gcm_decrypt,
+
+	.base.cra_name		= "gcm(aes)",
+	.base.cra_driver_name	= "gcm-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
 };
 
 static int __init ghash_ce_mod_init(void)
 {
-	return crypto_register_shash(&ghash_alg);
+	int ret;
+
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+
+	if (elf_hwcap & HWCAP_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
+	ret = crypto_register_shash(&ghash_alg);
+	if (ret)
+		return ret;
+
+	if (elf_hwcap & HWCAP_PMULL) {
+		ret = crypto_register_aead(&gcm_aes_alg);
+		if (ret)
+			crypto_unregister_shash(&ghash_alg);
+	}
+	return ret;
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
 	crypto_unregister_shash(&ghash_alg);
+	crypto_unregister_aead(&gcm_aes_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+	{ cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index ea319c055f5d..efbeb3e0dcfb 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_update(desc, data, len);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, data, len, out);
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, NULL, 0, out);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 0ed9486f75dd..fd1ff2b13dfa 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -1,7 +1,7 @@
 /*
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 						 finalize);
 
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
+	if (!may_use_simd()) {
+		if (len)
+			sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd()) {
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index a2226f841960..b064d925fe2a 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
 
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f81c7b685fc6..2326e39d5892 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += rwsem.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += set_memory.h
-generic-y += simd.h
 generic-y += sizes.h
 generic-y += switch_to.h
 generic-y += trace_clock.h
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8cef47fa2218..b7e3f74822da 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -116,6 +116,8 @@ static inline void gic_write_bpr1(u32 val)
 
 #define gic_read_typer(c)		readq_relaxed(c)
 #define gic_write_irouter(v, c)		writeq_relaxed(v, c)
+#define gic_read_lpir(c)		readq_relaxed(c)
+#define gic_write_lpir(v, c)		writeq_relaxed(v, c)
 
 #define gic_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
 
@@ -133,5 +135,10 @@ static inline void gic_write_bpr1(u32 val)
 #define gicr_write_pendbaser(v, c)	writeq_relaxed(v, c)
 #define gicr_read_pendbaser(c)		readq_relaxed(c)
 
+#define gits_write_vpropbaser(v, c)	writeq_relaxed(v, c)
+
+#define gits_write_vpendbaser(v, c)	writeq_relaxed(v, c)
+#define gits_read_vpendbaser(c)		readq_relaxed(c)
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
new file mode 100644
index 000000000000..636e755bcdca
--- /dev/null
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_ASM_BUG_H
+/*
+ * Copyright (C) 2017  ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define __ASM_ASM_BUG_H
+
+#include <asm/brk-imm.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#define __BUGVERBOSE_LOCATION(file, line)			\
+		.pushsection .rodata.str,"aMS",@progbits,1;	\
+	2:	.string file;					\
+		.popsection;					\
+								\
+		.long 2b - 0b;					\
+		.short line;
+#else
+#define _BUGVERBOSE_LOCATION(file, line)
+#endif
+
+#ifdef CONFIG_GENERIC_BUG
+
+#define __BUG_ENTRY(flags) 				\
+		.pushsection __bug_table,"aw";		\
+		.align 2;				\
+	0:	.long 1f - 0b;				\
+_BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
+		.short flags; 				\
+		.popsection;				\
+	1:
+#else
+#define __BUG_ENTRY(flags)
+#endif
+
+#define ASM_BUG_FLAGS(flags)				\
+	__BUG_ENTRY(flags)				\
+	brk	BUG_BRK_IMM
+
+#define ASM_BUG()	ASM_BUG_FLAGS(0)
+
+#endif /* __ASM_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..d58a6253c6ab 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -230,12 +230,18 @@ lr	.req	x30		// link register
 	.endm
 
 	/*
-	 * @dst: Result of per_cpu(sym, smp_processor_id())
+	 * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for
+	 *       non-module code
 	 * @sym: The name of the per-cpu variable
 	 * @tmp: scratch register
 	 */
 	.macro adr_this_cpu, dst, sym, tmp
+#ifndef MODULE
+	adrp	\tmp, \sym
+	add	\dst, \tmp, #:lo12:\sym
+#else
 	adr_l	\dst, \sym
+#endif
 	mrs	\tmp, tpidr_el1
 	add	\dst, \dst, \tmp
 	.endm
@@ -353,6 +359,12 @@ alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
 alternative_else
 	dc	civac, \kaddr
 alternative_endif
+	.elseif	(\op == cvap)
+alternative_if ARM64_HAS_DCPOP
+	sys 3, c7, c12, 1, \kaddr	// dc cvap
+alternative_else
+	dc	cvac, \kaddr
+alternative_endif
 	.else
 	dc	\op, \kaddr
 	.endif
@@ -403,6 +415,17 @@ alternative_endif
 	.size	__pi_##x, . - x;	\
 	ENDPROC(x)
 
+/*
+ * Annotate a function as being unsuitable for kprobes.
+ */
+#ifdef CONFIG_KPROBES
+#define NOKPROBE(x)				\
+	.pushsection "_kprobe_blacklist", "aw";	\
+	.quad	x;				\
+	.popsection;
+#else
+#define NOKPROBE(x)
+#endif
 	/*
 	 * Emit a 64-bit absolute little endian symbol reference in a way that
 	 * ensures that it will be resolved at build time, even when building a
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index a02a57186f56..d7dc43752705 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,41 +18,12 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#include <asm/brk-imm.h>
+#include <linux/stringify.h>
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
-#define __BUGVERBOSE_LOCATION(file, line)				\
-		".pushsection .rodata.str,\"aMS\",@progbits,1\n"	\
-	"2:	.string \"" file "\"\n\t"				\
-		".popsection\n\t"					\
-									\
-		".long 2b - 0b\n\t"					\
-		".short " #line "\n\t"
-#else
-#define _BUGVERBOSE_LOCATION(file, line)
-#endif
-
-#ifdef CONFIG_GENERIC_BUG
-
-#define __BUG_ENTRY(flags) 				\
-		".pushsection __bug_table,\"aw\"\n\t"	\
-		".align 2\n\t"				\
-	"0:	.long 1f - 0b\n\t"			\
-_BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
-		".short " #flags "\n\t"			\
-		".popsection\n"				\
-	"1:	"
-#else
-#define __BUG_ENTRY(flags) ""
-#endif
+#include <asm/asm-bug.h>
 
 #define __BUG_FLAGS(flags)				\
-	asm volatile (					\
-		__BUG_ENTRY(flags)			\
-		"brk %[imm]" :: [imm] "i" (BUG_BRK_IMM)	\
-	);
-
+	asm volatile (__stringify(ASM_BUG_FLAGS(flags)));
 
 #define BUG() do {					\
 	__BUG_FLAGS(0);					\
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index d74a284abdc2..76d1cc85d5b1 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -67,7 +67,9 @@
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
+extern void __clean_dcache_area_pop(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 extern void sync_icache_aliases(void *kaddr, unsigned long len);
@@ -150,6 +152,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
 {
 }
 
-int set_memory_valid(unsigned long addr, unsigned long size, int enable);
+int set_memory_valid(unsigned long addr, int numpages, int enable);
 
 #endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8d2272c6822c..8da621627d7c 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -39,7 +39,8 @@
 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
 #define ARM64_WORKAROUND_858921			19
 #define ARM64_WORKAROUND_CAVIUM_30115		20
+#define ARM64_HAS_DCPOP				21
 
-#define ARM64_NCAPS				21
+#define ARM64_NCAPS				22
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8f3043aba873..b93904b16fc2 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -3,7 +3,9 @@
 
 #include <asm/boot.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 #include <asm/io.h>
+#include <asm/memory.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
 #include <asm/ptrace.h>
@@ -20,8 +22,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define arch_efi_call_virt_setup()					\
 ({									\
-	kernel_neon_begin();						\
 	efi_virtmap_load();						\
+	__efi_fpsimd_begin();						\
 })
 
 #define arch_efi_call_virt(p, f, args...)				\
@@ -33,8 +35,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
+	__efi_fpsimd_end();						\
 	efi_virtmap_unload();						\
-	kernel_neon_end();						\
 })
 
 #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
@@ -48,6 +50,13 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
  */
 #define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
 
+/*
+ * In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
+ * kernel need greater alignment than we require the segments to be padded to.
+ */
+#define EFI_KIMG_ALIGN	\
+	(SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
+
 /* on arm64, the FDT may be located anywhere in system RAM */
 static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
 {
@@ -81,6 +90,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
 #define alloc_screen_info(x...)		&screen_info
 #define free_screen_info(x...)
 
+/* redeclare as 'hidden' so the compiler will generate relative references */
+extern struct screen_info screen_info __attribute__((__visibility__("hidden")));
+
 static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
 {
 }
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 3288c2b36731..33be513ef24c 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -139,7 +139,6 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define SET_PERSONALITY(ex)						\
 ({									\
-	clear_bit(TIF_32BIT, &current->mm->context.flags);		\
 	clear_thread_flag(TIF_32BIT);					\
 	current->personality &= ~READ_IMPLIES_EXEC;			\
 })
@@ -195,7 +194,6 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
  */
 #define COMPAT_SET_PERSONALITY(ex)					\
 ({									\
-	set_bit(TIF_32BIT, &current->mm->context.flags);		\
 	set_thread_flag(TIF_32BIT);					\
  })
 #define COMPAT_ARCH_DLINFO
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8cabd57b6348..66ed8b6b9976 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -77,16 +77,23 @@
 #define ESR_ELx_EC_MASK		(UL(0x3F) << ESR_ELx_EC_SHIFT)
 #define ESR_ELx_EC(esr)		(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 
-#define ESR_ELx_IL		(UL(1) << 25)
+#define ESR_ELx_IL_SHIFT	(25)
+#define ESR_ELx_IL		(UL(1) << ESR_ELx_IL_SHIFT)
 #define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
 
 /* ISS field definitions shared by different classes */
-#define ESR_ELx_WNR		(UL(1) << 6)
+#define ESR_ELx_WNR_SHIFT	(6)
+#define ESR_ELx_WNR		(UL(1) << ESR_ELx_WNR_SHIFT)
 
 /* Shared ISS field definitions for Data/Instruction aborts */
-#define ESR_ELx_FnV		(UL(1) << 10)
-#define ESR_ELx_EA		(UL(1) << 9)
-#define ESR_ELx_S1PTW		(UL(1) << 7)
+#define ESR_ELx_SET_SHIFT	(11)
+#define ESR_ELx_SET_MASK	(UL(3) << ESR_ELx_SET_SHIFT)
+#define ESR_ELx_FnV_SHIFT	(10)
+#define ESR_ELx_FnV		(UL(1) << ESR_ELx_FnV_SHIFT)
+#define ESR_ELx_EA_SHIFT	(9)
+#define ESR_ELx_EA		(UL(1) << ESR_ELx_EA_SHIFT)
+#define ESR_ELx_S1PTW_SHIFT	(7)
+#define ESR_ELx_S1PTW		(UL(1) << ESR_ELx_S1PTW_SHIFT)
 
 /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
 #define ESR_ELx_FSC		(0x3F)
@@ -97,15 +104,20 @@
 #define ESR_ELx_FSC_PERM	(0x0C)
 
 /* ISS field definitions for Data Aborts */
-#define ESR_ELx_ISV		(UL(1) << 24)
+#define ESR_ELx_ISV_SHIFT	(24)
+#define ESR_ELx_ISV		(UL(1) << ESR_ELx_ISV_SHIFT)
 #define ESR_ELx_SAS_SHIFT	(22)
 #define ESR_ELx_SAS		(UL(3) << ESR_ELx_SAS_SHIFT)
-#define ESR_ELx_SSE		(UL(1) << 21)
+#define ESR_ELx_SSE_SHIFT	(21)
+#define ESR_ELx_SSE		(UL(1) << ESR_ELx_SSE_SHIFT)
 #define ESR_ELx_SRT_SHIFT	(16)
 #define ESR_ELx_SRT_MASK	(UL(0x1F) << ESR_ELx_SRT_SHIFT)
-#define ESR_ELx_SF 		(UL(1) << 15)
-#define ESR_ELx_AR 		(UL(1) << 14)
-#define ESR_ELx_CM 		(UL(1) << 8)
+#define ESR_ELx_SF_SHIFT	(15)
+#define ESR_ELx_SF 		(UL(1) << ESR_ELx_SF_SHIFT)
+#define ESR_ELx_AR_SHIFT	(14)
+#define ESR_ELx_AR 		(UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_CM_SHIFT	(8)
+#define ESR_ELx_CM 		(UL(1) << ESR_ELx_CM_SHIFT)
 
 /* ISS field definitions for exceptions taken in to Hyp */
 #define ESR_ELx_CV		(UL(1) << 24)
@@ -157,9 +169,10 @@
 /*
  * User space cache operations have the following sysreg encoding
  * in System instructions.
- * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
  */
 #define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC	14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP	12
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAU	11
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAC	10
 #define ESR_ELx_SYS64_ISS_CRM_IC_IVAU	5
@@ -209,6 +222,13 @@
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
+static inline bool esr_is_data_abort(u32 esr)
+{
+	const u32 ec = ESR_ELx_EC(esr);
+
+	return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
+}
+
 const char *esr_get_class_string(u32 esr);
 #endif /* __ASSEMBLY */
 
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 50f559f574fe..410c48163c6a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,16 +41,6 @@ struct fpsimd_state {
 	unsigned int cpu;
 };
 
-/*
- * Struct for stacking the bottom 'n' FP/SIMD registers.
- */
-struct fpsimd_partial_state {
-	u32		fpsr;
-	u32		fpcr;
-	u32		num_regs;
-	__uint128_t	vregs[32];
-};
-
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -77,9 +67,9 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
-extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
-				      u32 num_regs);
-extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+/* For use by EFI runtime services calls only */
+extern void __efi_fpsimd_begin(void);
+extern void __efi_fpsimd_end(void);
 
 #endif
 
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2daf1293028..0f5fdd388b0d 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,59 +75,3 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	fpsimd_restore_fpcr x\tmpnr, \state
 .endm
-
-.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
-	mrs	x\tmpnr1, fpsr
-	str	w\numnr, [\state, #8]
-	mrs	x\tmpnr2, fpcr
-	stp	w\tmpnr1, w\tmpnr2, [\state]
-	adr	x\tmpnr1, 0f
-	add	\state, \state, x\numnr, lsl #4
-	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
-	br	x\tmpnr1
-	stp	q30, q31, [\state, #-16 * 30 - 16]
-	stp	q28, q29, [\state, #-16 * 28 - 16]
-	stp	q26, q27, [\state, #-16 * 26 - 16]
-	stp	q24, q25, [\state, #-16 * 24 - 16]
-	stp	q22, q23, [\state, #-16 * 22 - 16]
-	stp	q20, q21, [\state, #-16 * 20 - 16]
-	stp	q18, q19, [\state, #-16 * 18 - 16]
-	stp	q16, q17, [\state, #-16 * 16 - 16]
-	stp	q14, q15, [\state, #-16 * 14 - 16]
-	stp	q12, q13, [\state, #-16 * 12 - 16]
-	stp	q10, q11, [\state, #-16 * 10 - 16]
-	stp	q8, q9, [\state, #-16 * 8 - 16]
-	stp	q6, q7, [\state, #-16 * 6 - 16]
-	stp	q4, q5, [\state, #-16 * 4 - 16]
-	stp	q2, q3, [\state, #-16 * 2 - 16]
-	stp	q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
-
-.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
-	ldp	w\tmpnr1, w\tmpnr2, [\state]
-	msr	fpsr, x\tmpnr1
-	fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
-	adr	x\tmpnr1, 0f
-	ldr	w\tmpnr2, [\state, #8]
-	add	\state, \state, x\tmpnr2, lsl #4
-	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
-	br	x\tmpnr1
-	ldp	q30, q31, [\state, #-16 * 30 - 16]
-	ldp	q28, q29, [\state, #-16 * 28 - 16]
-	ldp	q26, q27, [\state, #-16 * 26 - 16]
-	ldp	q24, q25, [\state, #-16 * 24 - 16]
-	ldp	q22, q23, [\state, #-16 * 22 - 16]
-	ldp	q20, q21, [\state, #-16 * 20 - 16]
-	ldp	q18, q19, [\state, #-16 * 18 - 16]
-	ldp	q16, q17, [\state, #-16 * 16 - 16]
-	ldp	q14, q15, [\state, #-16 * 14 - 16]
-	ldp	q12, q13, [\state, #-16 * 12 - 16]
-	ldp	q10, q11, [\state, #-16 * 10 - 16]
-	ldp	q8, q9, [\state, #-16 * 8 - 16]
-	ldp	q6, q7, [\state, #-16 * 6 - 16]
-	ldp	q4, q5, [\state, #-16 * 4 - 16]
-	ldp	q2, q3, [\state, #-16 * 2 - 16]
-	ldp	q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index f32b42e8725d..5bb2fd4674e7 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,20 +48,10 @@ do {									\
 } while (0)
 
 static inline int
-futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (int)(encoded_op << 8) >> 20;
-	int cmparg = (int)(encoded_op << 20) >> 20;
 	int oldval = 0, ret, tmp;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1U << (oparg & 0x1f);
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 
 	switch (op) {
@@ -91,17 +81,9 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 793bd73b0d07..1dca41bea16a 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -18,7 +18,6 @@
 #ifndef __ASM_HUGETLB_H
 #define __ASM_HUGETLB_H
 
-#include <asm-generic/hugetlb.h>
 #include <asm/page.h>
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
@@ -82,6 +81,14 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep);
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 				  unsigned long addr, pte_t *ptep);
+extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, unsigned long sz);
+#define huge_pte_clear huge_pte_clear
+extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+				 pte_t *ptep, pte_t pte, unsigned long sz);
+#define set_huge_swap_pte_at set_huge_swap_pte_at
+
+#include <asm-generic/hugetlb.h>
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 static inline bool gigantic_page_supported(void) { return true; }
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b77197d941fc..5e6f77239064 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,45 +1,12 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
-#define IRQ_STACK_SIZE			THREAD_SIZE
-#define IRQ_STACK_START_SP		THREAD_START_SP
-
 #ifndef __ASSEMBLER__
 
-#include <linux/percpu.h>
-
 #include <asm-generic/irq.h>
-#include <asm/thread_info.h>
 
 struct pt_regs;
 
-DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-
-/*
- * The highest address on the stack, and the first to be used. Used to
- * find the dummy-stack frame put down by el?_irq() in entry.S, which
- * is structured as follows:
- *
- *       ------------
- *       |          |  <- irq_stack_ptr
- *   top ------------
- *       |   x19    | <- irq_stack_ptr - 0x08
- *       ------------
- *       |   x29    | <- irq_stack_ptr - 0x10
- *       ------------
- *
- * where x19 holds a copy of the task stack pointer where the struct pt_regs
- * from kernel_entry can be found.
- *
- */
-#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
-
-/*
- * The offset from irq_stack_ptr where entry.S will store the original
- * stack pointer. Used by unwind_frame() and dump_backtrace().
- */
-#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
-
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline int nr_legacy_irqs(void)
@@ -47,14 +14,5 @@ static inline int nr_legacy_irqs(void)
 	return 0;
 }
 
-static inline bool on_irq_stack(unsigned long sp, int cpu)
-{
-	/* variable names the same as kernel/stacktrace.c */
-	unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
-	unsigned long high = low + IRQ_STACK_START_SP;
-
-	return (low <= sp && sp <= high);
-}
-
 #endif /* !__ASSEMBLER__ */
 #endif
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22abadc..672c8684d5c2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -175,18 +175,15 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
-	pteval_t pteval;
-	unsigned long tmp;
-
-	asm volatile("//	kvm_set_s2pte_readonly\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	and	%0, %0, %3		// clear PTE_S2_RDWR\n"
-	"	orr	%0, %0, %4		// set PTE_S2_RDONLY\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
-	: "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
+	pteval_t old_pteval, pteval;
+
+	pteval = READ_ONCE(pte_val(*pte));
+	do {
+		old_pteval = pteval;
+		pteval &= ~PTE_S2_RDWR;
+		pteval |= PTE_S2_RDONLY;
+		pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+	} while (pteval != old_pteval);
 }
 
 static inline bool kvm_s2pte_readonly(pte_t *pte)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index ef39dcb9ca6a..3585a5e26151 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -25,6 +25,7 @@
 #include <linux/const.h>
 #include <linux/types.h>
 #include <asm/bug.h>
+#include <asm/page-def.h>
 #include <asm/sizes.h>
 
 /*
@@ -103,6 +104,58 @@
 #define KASAN_SHADOW_SIZE	(0)
 #endif
 
+#define MIN_THREAD_SHIFT	14
+
+/*
+ * VMAP'd stacks are allocated at page granularity, so we must ensure that such
+ * stacks are a multiple of page size.
+ */
+#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#define THREAD_SHIFT		PAGE_SHIFT
+#else
+#define THREAD_SHIFT		MIN_THREAD_SHIFT
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+#endif
+
+#define THREAD_SIZE		(UL(1) << THREAD_SHIFT)
+
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN		(2 * THREAD_SIZE)
+#else
+#define THREAD_ALIGN		THREAD_SIZE
+#endif
+
+#define IRQ_STACK_SIZE		THREAD_SIZE
+
+#define OVERFLOW_STACK_SIZE	SZ_4K
+
+/*
+ * Alignment of kernel segments (e.g. .text, .data).
+ */
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
+/*
+ *  4 KB granule:   1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule:  32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN			SZ_2M
+#else
+/*
+ *  4 KB granule:  16 level 3 entries, with contiguous bit
+ * 16 KB granule:   4 level 3 entries, without contiguous bit
+ * 64 KB granule:   1 level 3 entry
+ */
+#define SEGMENT_ALIGN			SZ_64K
+#endif
+
 /*
  * Memory types available.
  */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 5468c834b072..0d34bf0a89c7 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+#define MMCF_AARCH32	0x1	/* mm context flag for AArch32 executables */
+
 typedef struct {
 	atomic64_t	id;
 	void		*vdso;
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index ad4cdc966c0f..f922eaf780f9 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,12 +8,22 @@
  * published by the Free Software Foundation.
  */
 
+#ifndef __ASM_NEON_H
+#define __ASM_NEON_H
+
 #include <linux/types.h>
 #include <asm/fpsimd.h>
 
 #define cpu_has_neon()		system_supports_fpsimd()
 
-#define kernel_neon_begin()	kernel_neon_begin_partial(32)
-
-void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_begin(void);
 void kernel_neon_end(void);
+
+/*
+ * Temporary macro to allow the crypto code to compile. Note that the
+ * semantics of kernel_neon_begin_partial() are now different from the
+ * original as it does not allow being called in an interrupt context.
+ */
+#define kernel_neon_begin_partial(num_regs)	kernel_neon_begin()
+
+#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index bf466d1876e3..ef7b23863a7c 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -7,9 +7,6 @@
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
 
-/* currently, arm64 implements flat NUMA topology */
-#define parent_node(node)	(node)
-
 int __node_distance(int from, int to);
 #define node_distance(a, b) __node_distance(a, b)
 
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
new file mode 100644
index 000000000000..01591a29dc2e
--- /dev/null
+++ b/arch/arm64/include/asm/page-def.h
@@ -0,0 +1,34 @@
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PAGE_DEF_H
+#define __ASM_PAGE_DEF_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together  */
+#define PAGE_SHIFT		CONFIG_ARM64_PAGE_SHIFT
+#define CONT_SHIFT		CONFIG_ARM64_CONT_SHIFT
+#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK		(~(CONT_SIZE-1))
+
+#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8472c6def5ef..60d02c81a3a2 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,17 +19,7 @@
 #ifndef __ASM_PAGE_H
 #define __ASM_PAGE_H
 
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together  */
-#define PAGE_SHIFT		CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT		CONFIG_ARM64_CONT_SHIFT
-#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-
-#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK		(~(CONT_SIZE-1))
+#include <asm/page-def.h>
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..0a5635fb0ef9 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -63,23 +63,21 @@
 #define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
 #define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
 
-#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
+#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
+#define __P010  PAGE_READONLY
+#define __P011  PAGE_READONLY
 #define __P100  PAGE_EXECONLY
 #define __P101  PAGE_READONLY_EXEC
-#define __P110  PAGE_COPY_EXEC
-#define __P111  PAGE_COPY_EXEC
+#define __P110  PAGE_READONLY_EXEC
+#define __P111  PAGE_READONLY_EXEC
 
 #define __S000  PAGE_NONE
 #define __S001  PAGE_READONLY
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6eae342ced6b..bc4e92337d16 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/cmpxchg.h>
 #include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
@@ -84,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
 })
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
-#else
-#define pte_hw_dirty(pte)	(0)
-#endif
 #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
@@ -124,12 +121,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+	return pte;
 }
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-	return set_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+	return pte;
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
@@ -168,11 +169,6 @@ static inline pte_t pte_mknoncont(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
-static inline pte_t pte_clear_rdonly(pte_t pte)
-{
-	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
 static inline pte_t pte_mkpresent(pte_t pte)
 {
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -220,22 +216,15 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_present(pte)) {
-		if (pte_sw_dirty(pte) && pte_write(pte))
-			pte_val(pte) &= ~PTE_RDONLY;
-		else
-			pte_val(pte) |= PTE_RDONLY;
-		if (pte_user_exec(pte) && !pte_special(pte))
-			__sync_icache_dcache(pte, addr);
-	}
+	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+		__sync_icache_dcache(pte, addr);
 
 	/*
 	 * If the existing pte is valid, check for potential race with
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
-	    pte_valid(*ptep) && pte_valid(pte)) {
+	if (pte_valid(*ptep) && pte_valid(pte)) {
 		VM_WARN_ONCE(!pte_young(pte),
 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
 			     __func__, pte_val(*ptep), pte_val(pte));
@@ -571,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
 }
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pte_t *ptep,
@@ -593,20 +581,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int __ptep_test_and_clear_young(pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned int tmp, res;
+	pte_t old_pte, pte;
 
-	asm volatile("//	__ptep_test_and_clear_young\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
-	"	and	%0, %0, %4		// clear PTE_AF\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
-	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
+	pte = READ_ONCE(*ptep);
+	do {
+		old_pte = pte;
+		pte = pte_mkold(pte);
+		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+					       pte_val(old_pte), pte_val(pte));
+	} while (pte_val(pte) != pte_val(old_pte));
 
-	return res;
+	return pte_young(pte);
 }
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -630,17 +615,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address, pte_t *ptep)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
-
-	asm volatile("//	ptep_get_and_clear\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	stxr	%w1, xzr, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
-
-	return __pte(old_pteval);
+	return __pte(xchg_relaxed(&pte_val(*ptep), 0));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -653,27 +628,32 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ * ptep_set_wrprotect - mark read-only while preserving the hardware update of
+ * the Access Flag.
  */
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned long tmp;
+	pte_t old_pte, pte;
 
-	asm volatile("//	ptep_set_wrprotect\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	tst	%0, %4			// check for hw dirty (!PTE_RDONLY)\n"
-	"	csel	%1, %3, xzr, eq		// set PTE_DIRTY|PTE_RDONLY if dirty\n"
-	"	orr	%0, %0, %1		// if !dirty, PTE_RDONLY is already set\n"
-	"	and	%0, %0, %5		// clear PTE_WRITE/PTE_DBM\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
-	: "cc");
+	/*
+	 * ptep_set_wrprotect() is only called on CoW mappings which are
+	 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
+	 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
+	 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
+	 * protection_map[]. There is no race with the hardware update of the
+	 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
+	 * is set.
+	 */
+	VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
+		     "%s: potential race with hardware DBM", __func__);
+	pte = READ_ONCE(*ptep);
+	do {
+		old_pte = pte;
+		pte = pte_wrprotect(pte);
+		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+					       pte_val(old_pte), pte_val(pte));
+	} while (pte_val(pte) != pte_val(old_pte));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -684,7 +664,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 	ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
 }
 #endif
-#endif	/* CONFIG_ARM64_HW_AFDBM */
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..29adab8138c3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -112,7 +112,7 @@ void tls_preserve_current_state(void);
 static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 {
 	memset(regs, 0, sizeof(*regs));
-	regs->syscallno = ~0UL;
+	forget_syscall(regs);
 	regs->pc = pc;
 }
 
@@ -159,7 +159,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
 
 #define task_pt_regs(p) \
-	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
 
 #define KSTK_EIP(tsk)	((unsigned long)task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)	user_stack_pointer(task_pt_regs(tsk))
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 11403fdd0a50..6069d66e0bc2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -72,8 +72,19 @@
 #define COMPAT_PT_TEXT_ADDR		0x10000
 #define COMPAT_PT_DATA_ADDR		0x10004
 #define COMPAT_PT_TEXT_END_ADDR		0x10008
+
+/*
+ * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
+ * a syscall -- i.e., its most recent entry into the kernel from
+ * userspace was not via SVC, or otherwise a tracer cancelled the syscall.
+ *
+ * This must have the value -1, for ABI compatibility with ptrace etc.
+ */
+#define NO_SYSCALL (-1)
+
 #ifndef __ASSEMBLY__
 #include <linux/bug.h>
+#include <linux/types.h>
 
 /* sizeof(struct user) for AArch32 */
 #define COMPAT_USER_SZ	296
@@ -116,11 +127,29 @@ struct pt_regs {
 		};
 	};
 	u64 orig_x0;
-	u64 syscallno;
+#ifdef __AARCH64EB__
+	u32 unused2;
+	s32 syscallno;
+#else
+	s32 syscallno;
+	u32 unused2;
+#endif
+
 	u64 orig_addr_limit;
 	u64 unused;	// maintain 16 byte alignment
+	u64 stackframe[2];
 };
 
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+	return regs->syscallno != NO_SYSCALL;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+	regs->syscallno = NO_SYSCALL;
+}
+
 #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
 
 #define arch_has_single_step()	(1)
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index eeaa97559bab..81abea0b7650 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -22,8 +22,6 @@
 
 #define AARCH32_KERN_SIGRET_CODE_OFFSET	0x500
 
-extern const compat_ulong_t aarch32_sigret_code[6];
-
 int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
 		       struct pt_regs *regs);
 int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
new file mode 100644
index 000000000000..fa8b3fe932e6
--- /dev/null
+++ b/arch/arm64/include/asm/simd.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+DECLARE_PER_CPU(bool, kernel_neon_busy);
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ *                instructions or access the SIMD register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+	/*
+	 * The raw_cpu_read() is racy if called with preemption enabled.
+	 * This is not a bug: kernel_neon_busy is only set when
+	 * preemption is disabled, so we cannot migrate to another CPU
+	 * while it is set, nor can we migrate to a CPU where it is set.
+	 * So, if we find it clear on some CPU then we're guaranteed to
+	 * find it clear on any CPU we could migrate to.
+	 *
+	 * If we are in between kernel_neon_begin()...kernel_neon_end(),
+	 * the flag will be set, but preemption is also disabled, so we
+	 * can't migrate to another CPU and spuriously see it become
+	 * false.
+	 */
+	return !in_irq() && !irqs_disabled() && !in_nmi() &&
+		!raw_cpu_read(kernel_neon_busy);
+}
+
+#else /* ! CONFIG_KERNEL_MODE_NEON */
+
+static __must_check inline bool may_use_simd(void) {
+	return false;
+}
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
+
+#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 55f08c5acfad..f82b447bd34f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,7 +148,7 @@ static inline void cpu_panic_kernel(void)
  */
 bool cpus_are_stuck_in_kernel(void);
 
-extern void smp_send_crash_stop(void);
+extern void crash_smp_send_stop(void);
 extern bool smp_crash_stop_failed(void);
 
 #endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..95ad7102b63c 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,58 +26,6 @@
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
  */
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	unsigned int tmp;
-	arch_spinlock_t lockval;
-	u32 owner;
-
-	/*
-	 * Ensure prior spin_lock operations to other locks have completed
-	 * on this CPU before we test whether "lock" is locked.
-	 */
-	smp_mb();
-	owner = READ_ONCE(lock->owner) << 16;
-
-	asm volatile(
-"	sevl\n"
-"1:	wfe\n"
-"2:	ldaxr	%w0, %2\n"
-	/* Is the lock free? */
-"	eor	%w1, %w0, %w0, ror #16\n"
-"	cbz	%w1, 3f\n"
-	/* Lock taken -- has there been a subsequent unlock->lock transition? */
-"	eor	%w1, %w3, %w0, lsl #16\n"
-"	cbz	%w1, 1b\n"
-	/*
-	 * The owner has been updated, so there was an unlock->lock
-	 * transition that we missed. That means we can rely on the
-	 * store-release of the unlock operation paired with the
-	 * load-acquire of the lock operation to publish any of our
-	 * previous stores to the new lock owner and therefore don't
-	 * need to bother with the writeback below.
-	 */
-"	b	4f\n"
-"3:\n"
-	/*
-	 * Serialise against any concurrent lockers by writing back the
-	 * unlocked lock value
-	 */
-	ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-"	stxr	%w1, %w0, %2\n"
-	__nops(2),
-	/* LSE atomics */
-"	mov	%w1, %w0\n"
-"	cas	%w0, %w0, %2\n"
-"	eor	%w1, %w1, %w0\n")
-	/* Somebody else wrote to the lock, GOTO 10 and reload the value */
-"	cbnz	%w1, 2b\n"
-"4:"
-	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
-	: "r" (owner)
-	: "memory");
-}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
@@ -176,7 +124,11 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	smp_mb(); /* See arch_spin_unlock_wait */
+	/*
+	 * Ensure prior spin_lock operations to other locks have completed
+	 * on this CPU before we test whether "lock" is locked.
+	 */
+	smp_mb(); /* ^^^ */
 	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
@@ -358,14 +310,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
 
-/*
- * Accesses appearing in program order before a spin_lock() operation
- * can be reordered with accesses inside the critical section, by virtue
- * of arch_spin_lock being constructed using acquire semantics.
- *
- * In cases where this is problematic (e.g. try_to_wake_up), an
- * smp_mb__before_spinlock() can restore the required ordering.
- */
-#define smp_mb__before_spinlock()	smp_mb()
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()	smp_mb()
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 5b6eafccc5d8..6ad30776e984 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,11 +16,15 @@
 #ifndef __ASM_STACKTRACE_H
 #define __ASM_STACKTRACE_H
 
-struct task_struct;
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/memory.h>
+#include <asm/ptrace.h>
 
 struct stackframe {
 	unsigned long fp;
-	unsigned long sp;
 	unsigned long pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	unsigned int graph;
@@ -32,4 +36,57 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
+DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
+
+static inline bool on_irq_stack(unsigned long sp)
+{
+	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
+	unsigned long high = low + IRQ_STACK_SIZE;
+
+	if (!low)
+		return false;
+
+	return (low <= sp && sp < high);
+}
+
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
+{
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	return (low <= sp && sp < high);
+}
+
+#ifdef CONFIG_VMAP_STACK
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+
+static inline bool on_overflow_stack(unsigned long sp)
+{
+	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
+	unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+	return (low <= sp && sp < high);
+}
+#else
+static inline bool on_overflow_stack(unsigned long sp) { return false; }
+#endif
+
+/*
+ * We can only safely access per-cpu stacks from current in a non-preemptible
+ * context.
+ */
+static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp)
+{
+	if (on_task_stack(tsk, sp))
+		return true;
+	if (tsk != current || preemptible())
+		return false;
+	if (on_irq_stack(sp))
+		return true;
+	if (on_overflow_stack(sp))
+		return true;
+
+	return false;
+}
+
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index d0aa42907569..dd95d33a5bd5 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -52,6 +52,10 @@ extern void *__memset(void *, int, __kernel_size_t);
 #define __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *, const void *, size_t);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
 
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 248339e4aaf5..f707fed5886f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -329,6 +329,7 @@
 #define ID_AA64ISAR1_LRCPC_SHIFT	20
 #define ID_AA64ISAR1_FCMA_SHIFT		16
 #define ID_AA64ISAR1_JSCVT_SHIFT	12
+#define ID_AA64ISAR1_DPB_SHIFT		0
 
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_GIC_SHIFT		24
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 46c3b93cf865..ddded6497a8a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,19 +23,11 @@
 
 #include <linux/compiler.h>
 
-#ifdef CONFIG_ARM64_4K_PAGES
-#define THREAD_SIZE_ORDER	2
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define THREAD_SIZE_ORDER	0
-#endif
-
-#define THREAD_SIZE		16384
-#define THREAD_START_SP		(THREAD_SIZE - 16)
-
 #ifndef __ASSEMBLY__
 
 struct task_struct;
 
+#include <asm/memory.h>
 #include <asm/stack_pointer.h>
 #include <asm/types.h>
 
@@ -68,6 +60,9 @@ struct thread_info {
 #define thread_saved_fp(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.fp))
 
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec     arch_setup_new_exec
+
 #endif
 
 /*
@@ -86,6 +81,7 @@ struct thread_info {
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
+#define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
 #define TIF_NOHZ		7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
@@ -107,11 +103,12 @@ struct thread_info {
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE)
+				 _TIF_UPROBE | _TIF_FSCHECK)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 02e9035b0685..d131501c6222 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
 
 void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static inline int __in_irqentry_text(unsigned long ptr)
 {
 	return ptr >= (unsigned long)&__irqentry_text_start &&
 	       ptr < (unsigned long)&__irqentry_text_end;
 }
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
-	return 0;
-}
-#endif
 
 static inline int in_exception_text(unsigned long ptr)
 {
@@ -60,4 +53,9 @@ static inline int in_exception_text(unsigned long ptr)
 	return in ? : __in_irqentry_text(ptr);
 }
 
+static inline int in_entry_text(unsigned long ptr)
+{
+	return ptr >= (unsigned long)&__entry_text_start &&
+	       ptr < (unsigned long)&__entry_text_end;
+}
 #endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fab46a0ea223..fc0f9eb66039 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -45,6 +45,9 @@ static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
 
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
+
 	/*
 	 * Enable/disable UAO so that copy_to_user() etc can access
 	 * kernel memory with the unprivileged instructions.
@@ -347,4 +350,16 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+struct page;
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
+extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
+
+static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+	kasan_check_write(dst, size);
+	return __copy_user_flushcache(dst, src, size);
+}
+#endif
+
 #endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 4e187ce2a811..4b9344cba83a 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -35,5 +35,6 @@
 #define HWCAP_JSCVT		(1 << 13)
 #define HWCAP_FCMA		(1 << 14)
 #define HWCAP_LRCPC		(1 << 15)
+#define HWCAP_DCPOP		(1 << 16)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e25c11e727fe..b3162715ed78 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -95,7 +95,7 @@ static int __init dt_scan_depth1_nodes(unsigned long node,
  * __acpi_map_table() will be called before page_init(), so early_ioremap()
  * or early_memremap() should be called here to for ACPI table mapping.
  */
-char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
 {
 	if (!size)
 		return NULL;
@@ -103,7 +103,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 	return early_memremap(phys, size);
 }
 
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
 {
 	if (!map || !size)
 		return;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..71bf088f1e4b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,6 +75,7 @@ int main(void)
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
   DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
+  DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..cd52d365d1f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -888,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
+#ifdef CONFIG_ARM64_PMEM
+	{
+		.desc = "Data cache clean to Point of Persistence",
+		.capability = ARM64_HAS_DCPOP,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.min_field_value = 1,
+	},
+#endif
 	{},
 };
 
@@ -916,6 +928,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index f495ee5049fd..311885962830 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -68,6 +68,7 @@ static const char *const hwcap_str[] = {
 	"jscvt",
 	"fcma",
 	"lrcpc",
+	"dcpop",
 	NULL
 };
 
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f146b1..6a27cd6dbfa6 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-/*
- * Save the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_save_partial_state)
-	fpsimd_save_partial x0, 1, 8, 9
-	ret
-ENDPROC(fpsimd_save_partial_state)
-
-/*
- * Load the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_load_partial_state)
-	fpsimd_restore_partial x0, 8, 9
-	ret
-ENDPROC(fpsimd_load_partial_state)
-
-#endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..e1c59d4008a8 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -69,8 +69,55 @@
 #define BAD_FIQ		2
 #define BAD_ERROR	3
 
-	.macro	kernel_entry, el, regsize = 64
+	.macro kernel_ventry	label
+	.align 7
 	sub	sp, sp, #S_FRAME_SIZE
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * Test whether the SP has overflowed, without corrupting a GPR.
+	 * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
+	 */
+	add	sp, sp, x0			// sp' = sp + x0
+	sub	x0, sp, x0			// x0' = sp' - x0 = (sp + x0) - x0 = sp
+	tbnz	x0, #THREAD_SHIFT, 0f
+	sub	x0, sp, x0			// x0'' = sp' - x0' = (sp + x0) - sp = x0
+	sub	sp, sp, x0			// sp'' = sp' - x0 = (sp + x0) - x0 = sp
+	b	\label
+
+0:
+	/*
+	 * Either we've just detected an overflow, or we've taken an exception
+	 * while on the overflow stack. Either way, we won't return to
+	 * userspace, and can clobber EL0 registers to free up GPRs.
+	 */
+
+	/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+	msr	tpidr_el0, x0
+
+	/* Recover the original x0 value and stash it in tpidrro_el0 */
+	sub	x0, sp, x0
+	msr	tpidrro_el0, x0
+
+	/* Switch to the overflow stack */
+	adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
+
+	/*
+	 * Check whether we were already on the overflow stack. This may happen
+	 * after panic() re-enables interrupts.
+	 */
+	mrs	x0, tpidr_el0			// sp of interrupted context
+	sub	x0, sp, x0			// delta with top of overflow stack
+	tst	x0, #~(OVERFLOW_STACK_SIZE - 1)	// within range?
+	b.ne	__bad_stack			// no? -> bad stack pointer
+
+	/* We were already on the overflow stack. Restore sp/x0 and carry on. */
+	sub	sp, sp, x0
+	mrs	x0, tpidrro_el0
+#endif
+	b	\label
+	.endm
+
+	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
 	.endif
@@ -111,6 +158,18 @@
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
 
+	/*
+	 * In order to be able to dump the contents of struct pt_regs at the
+	 * time the exception was taken (in case we attempt to walk the call
+	 * stack later), chain it together with the stack frames.
+	 */
+	.if \el == 0
+	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	.else
+	stp	x29, x22, [sp, #S_STACKFRAME]
+	.endif
+	add	x29, sp, #S_STACKFRAME
+
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 	/*
 	 * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
@@ -138,12 +197,10 @@ alternative_else_nop_endif
 
 	stp	x22, x23, [sp, #S_PC]
 
-	/*
-	 * Set syscallno to -1 by default (overridden later if real syscall).
-	 */
+	/* Not in a syscall by default (el0_svc overwrites for real syscall) */
 	.if	\el == 0
-	mvn	x21, xzr
-	str	x21, [sp, #S_SYSCALLNO]
+	mov	w21, #NO_SYSCALL
+	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
 	/*
@@ -259,20 +316,12 @@ alternative_else_nop_endif
 	and	x25, x25, #~(THREAD_SIZE - 1)
 	cbnz	x25, 9998f
 
-	adr_this_cpu x25, irq_stack, x26
-	mov	x26, #IRQ_STACK_START_SP
+	ldr_this_cpu x25, irq_stack_ptr, x26
+	mov	x26, #IRQ_STACK_SIZE
 	add	x26, x25, x26
 
 	/* switch to the irq stack */
 	mov	sp, x26
-
-	/*
-	 * Add a dummy stack frame, this non-standard format is fixed up
-	 * by unwind_frame()
-	 */
-	stp     x29, x19, [sp, #-16]!
-	mov	x29, sp
-
 9998:
 	.endm
 
@@ -290,8 +339,9 @@ alternative_else_nop_endif
  *
  * x7 is reserved for the system call number in 32-bit mode.
  */
-sc_nr	.req	x25		// number of system calls
-scno	.req	x26		// syscall number
+wsc_nr	.req	w25		// number of system calls
+wscno	.req	w26		// syscall number
+xscno	.req	x26		// syscall number (zero-extended)
 stbl	.req	x27		// syscall table pointer
 tsk	.req	x28		// current thread_info
 
@@ -315,34 +365,62 @@ tsk	.req	x28		// current thread_info
 
 	.align	11
 ENTRY(vectors)
-	ventry	el1_sync_invalid		// Synchronous EL1t
-	ventry	el1_irq_invalid			// IRQ EL1t
-	ventry	el1_fiq_invalid			// FIQ EL1t
-	ventry	el1_error_invalid		// Error EL1t
+	kernel_ventry	el1_sync_invalid		// Synchronous EL1t
+	kernel_ventry	el1_irq_invalid			// IRQ EL1t
+	kernel_ventry	el1_fiq_invalid			// FIQ EL1t
+	kernel_ventry	el1_error_invalid		// Error EL1t
 
-	ventry	el1_sync			// Synchronous EL1h
-	ventry	el1_irq				// IRQ EL1h
-	ventry	el1_fiq_invalid			// FIQ EL1h
-	ventry	el1_error_invalid		// Error EL1h
+	kernel_ventry	el1_sync			// Synchronous EL1h
+	kernel_ventry	el1_irq				// IRQ EL1h
+	kernel_ventry	el1_fiq_invalid			// FIQ EL1h
+	kernel_ventry	el1_error_invalid		// Error EL1h
 
-	ventry	el0_sync			// Synchronous 64-bit EL0
-	ventry	el0_irq				// IRQ 64-bit EL0
-	ventry	el0_fiq_invalid			// FIQ 64-bit EL0
-	ventry	el0_error_invalid		// Error 64-bit EL0
+	kernel_ventry	el0_sync			// Synchronous 64-bit EL0
+	kernel_ventry	el0_irq				// IRQ 64-bit EL0
+	kernel_ventry	el0_fiq_invalid			// FIQ 64-bit EL0
+	kernel_ventry	el0_error_invalid		// Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
-	ventry	el0_sync_compat			// Synchronous 32-bit EL0
-	ventry	el0_irq_compat			// IRQ 32-bit EL0
-	ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
-	ventry	el0_error_invalid_compat	// Error 32-bit EL0
+	kernel_ventry	el0_sync_compat			// Synchronous 32-bit EL0
+	kernel_ventry	el0_irq_compat			// IRQ 32-bit EL0
+	kernel_ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
+	kernel_ventry	el0_error_invalid_compat	// Error 32-bit EL0
 #else
-	ventry	el0_sync_invalid		// Synchronous 32-bit EL0
-	ventry	el0_irq_invalid			// IRQ 32-bit EL0
-	ventry	el0_fiq_invalid			// FIQ 32-bit EL0
-	ventry	el0_error_invalid		// Error 32-bit EL0
+	kernel_ventry	el0_sync_invalid		// Synchronous 32-bit EL0
+	kernel_ventry	el0_irq_invalid			// IRQ 32-bit EL0
+	kernel_ventry	el0_fiq_invalid			// FIQ 32-bit EL0
+	kernel_ventry	el0_error_invalid		// Error 32-bit EL0
 #endif
 END(vectors)
 
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * We detected an overflow in kernel_ventry, which switched to the
+	 * overflow stack. Stash the exception regs, and head to our overflow
+	 * handler.
+	 */
+__bad_stack:
+	/* Restore the original x0 value */
+	mrs	x0, tpidrro_el0
+
+	/*
+	 * Store the original GPRs to the new stack. The orginal SP (minus
+	 * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+	 */
+	sub	sp, sp, #S_FRAME_SIZE
+	kernel_entry 1
+	mrs	x0, tpidr_el0
+	add	x0, x0, #S_FRAME_SIZE
+	str	x0, [sp, #S_SP]
+
+	/* Stash the regs for handle_bad_stack */
+	mov	x0, sp
+
+	/* Time to die */
+	bl	handle_bad_stack
+	ASM_BUG()
+#endif /* CONFIG_VMAP_STACK */
+
 /*
  * Invalid mode handlers
  */
@@ -351,7 +429,8 @@ END(vectors)
 	mov	x0, sp
 	mov	x1, #\reason
 	mrs	x2, esr_el1
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 	.endm
 
 el0_sync_invalid:
@@ -448,14 +527,16 @@ el1_sp_pc:
 	mrs	x0, far_el1
 	enable_dbg
 	mov	x2, sp
-	b	do_sp_pc_abort
+	bl	do_sp_pc_abort
+	ASM_BUG()
 el1_undef:
 	/*
 	 * Undefined instruction
 	 */
 	enable_dbg
 	mov	x0, sp
-	b	do_undefinstr
+	bl	do_undefinstr
+	ASM_BUG()
 el1_dbg:
 	/*
 	 * Debug exception handling
@@ -473,7 +554,8 @@ el1_inv:
 	mov	x0, sp
 	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 ENDPROC(el1_sync)
 
 	.align	6
@@ -577,8 +659,8 @@ el0_svc_compat:
 	 * AArch32 syscall handling
 	 */
 	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
-	uxtw	scno, w7			// syscall number in w7 (r7)
-	mov     sc_nr, #__NR_compat_syscalls
+	mov	wscno, w7			// syscall number in w7 (r7)
+	mov     wsc_nr, #__NR_compat_syscalls
 	b	el0_svc_naked
 
 	.align	6
@@ -707,38 +789,6 @@ el0_irq_naked:
 ENDPROC(el0_irq)
 
 /*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- *   x0 = previous task_struct (must be preserved across the switch)
- *   x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
-	mov	x10, #THREAD_CPU_CONTEXT
-	add	x8, x0, x10
-	mov	x9, sp
-	stp	x19, x20, [x8], #16		// store callee-saved registers
-	stp	x21, x22, [x8], #16
-	stp	x23, x24, [x8], #16
-	stp	x25, x26, [x8], #16
-	stp	x27, x28, [x8], #16
-	stp	x29, x9, [x8], #16
-	str	lr, [x8]
-	add	x8, x1, x10
-	ldp	x19, x20, [x8], #16		// restore callee-saved registers
-	ldp	x21, x22, [x8], #16
-	ldp	x23, x24, [x8], #16
-	ldp	x25, x26, [x8], #16
-	ldp	x27, x28, [x8], #16
-	ldp	x29, x9, [x8], #16
-	ldr	lr, [x8]
-	mov	sp, x9
-	msr	sp_el0, x1
-	ret
-ENDPROC(cpu_switch_to)
-
-/*
  * This is the fast syscall return path.  We do as little as possible here,
  * and this includes saving x0 back into the kernel stack.
  */
@@ -781,36 +831,24 @@ finish_ret_to_user:
 ENDPROC(ret_to_user)
 
 /*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bl	schedule_tail
-	cbz	x19, 1f				// not a kernel thread
-	mov	x0, x20
-	blr	x19
-1:	get_thread_info tsk
-	b	ret_to_user
-ENDPROC(ret_from_fork)
-
-/*
  * SVC handler.
  */
 	.align	6
 el0_svc:
 	adrp	stbl, sys_call_table		// load syscall table pointer
-	uxtw	scno, w8			// syscall number in w8
-	mov	sc_nr, #__NR_syscalls
+	mov	wscno, w8			// syscall number in w8
+	mov	wsc_nr, #__NR_syscalls
 el0_svc_naked:					// compat entry point
-	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
+	stp	x0, xscno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
 	ct_user_exit 1
 
 	ldr	x16, [tsk, #TSK_TI_FLAGS]	// check for syscall hooks
 	tst	x16, #_TIF_SYSCALL_WORK
 	b.ne	__sys_trace
-	cmp     scno, sc_nr                     // check upper syscall limit
+	cmp     wscno, wsc_nr			// check upper syscall limit
 	b.hs	ni_sys
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 	b	ret_fast_syscall
 ni_sys:
@@ -824,24 +862,23 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	w0, #-1				// set default errno for
-	cmp     scno, x0			// user-issued syscall(-1)
+	cmp     wscno, #NO_SYSCALL		// user-issued syscall(-1)?
 	b.ne	1f
-	mov	x0, #-ENOSYS
+	mov	x0, #-ENOSYS			// set default errno if so
 	str	x0, [sp, #S_X0]
 1:	mov	x0, sp
 	bl	syscall_trace_enter
-	cmp	w0, #-1				// skip the syscall?
+	cmp	w0, #NO_SYSCALL			// skip the syscall?
 	b.eq	__sys_trace_return_skipped
-	uxtw	scno, w0			// syscall number (possibly new)
+	mov	wscno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
-	cmp	scno, sc_nr			// check upper syscall limit
+	cmp	wscno, wsc_nr			// check upper syscall limit
 	b.hs	__ni_sys_trace
 	ldp	x0, x1, [sp]			// restore the syscall args
 	ldp	x2, x3, [sp, #S_X2]
 	ldp	x4, x5, [sp, #S_X4]
 	ldp	x6, x7, [sp, #S_X6]
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 
 __sys_trace_return:
@@ -865,3 +902,49 @@ ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ *   x0 = previous task_struct (must be preserved across the switch)
+ *   x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+	mov	x10, #THREAD_CPU_CONTEXT
+	add	x8, x0, x10
+	mov	x9, sp
+	stp	x19, x20, [x8], #16		// store callee-saved registers
+	stp	x21, x22, [x8], #16
+	stp	x23, x24, [x8], #16
+	stp	x25, x26, [x8], #16
+	stp	x27, x28, [x8], #16
+	stp	x29, x9, [x8], #16
+	str	lr, [x8]
+	add	x8, x1, x10
+	ldp	x19, x20, [x8], #16		// restore callee-saved registers
+	ldp	x21, x22, [x8], #16
+	ldp	x23, x24, [x8], #16
+	ldp	x25, x26, [x8], #16
+	ldp	x27, x28, [x8], #16
+	ldp	x29, x9, [x8], #16
+	ldr	lr, [x8]
+	mov	sp, x9
+	msr	sp_el0, x1
+	ret
+ENDPROC(cpu_switch_to)
+NOKPROBE(cpu_switch_to)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	cbz	x19, 1f				// not a kernel thread
+	mov	x0, x20
+	blr	x19
+1:	get_thread_info tsk
+	b	ret_to_user
+ENDPROC(ret_from_fork)
+NOKPROBE(ret_from_fork)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c7b4995868e1..3a68cf38a6b3 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,16 +17,19 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bottom_half.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
-#include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
+#include <asm/simd.h>
 
 #define FPEXC_IOF	(1 << 0)
 #define FPEXC_DZF	(1 << 1)
@@ -62,6 +65,13 @@
  * CPU currently contain the most recent userland FPSIMD state of the current
  * task.
  *
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
+ * save the task's FPSIMD context back to task_struct from softirq context.
+ * To prevent this from racing with the manipulation of the task's FPSIMD state
+ * from task context and thereby corrupting the state, it is necessary to
+ * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
+ * flag with local_bh_disable() unless softirqs are already masked.
+ *
  * For a certain task, the sequence may look something like this:
  * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
@@ -161,11 +171,14 @@ void fpsimd_flush_thread(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -176,10 +189,13 @@ void fpsimd_preserve_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -191,15 +207,18 @@ void fpsimd_restore_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
 		fpsimd_load_state(st);
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -211,15 +230,18 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	fpsimd_load_state(state);
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -232,52 +254,122 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
-static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
-static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+DEFINE_PER_CPU(bool, kernel_neon_busy);
+EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
 
 /*
  * Kernel-side NEON support functions
  */
-void kernel_neon_begin_partial(u32 num_regs)
+
+/*
+ * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the FPSIMD registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_neon_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the FPSIMD registers until kernel_neon_end() is
+ * called.
+ */
+void kernel_neon_begin(void)
 {
 	if (WARN_ON(!system_supports_fpsimd()))
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-		BUG_ON(num_regs > 32);
-		fpsimd_save_partial_state(s, roundup(num_regs, 2));
-	} else {
-		/*
-		 * Save the userland FPSIMD state if we have one and if we
-		 * haven't done so already. Clear fpsimd_last_state to indicate
-		 * that there is no longer userland FPSIMD state in the
-		 * registers.
-		 */
-		preempt_disable();
-		if (current->mm &&
-		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-			fpsimd_save_state(&current->thread.fpsimd_state);
-		this_cpu_write(fpsimd_last_state, NULL);
-	}
+	BUG_ON(!may_use_simd());
+
+	local_bh_disable();
+
+	__this_cpu_write(kernel_neon_busy, true);
+
+	/* Save unsaved task fpsimd state, if any: */
+	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+
+	/* Invalidate any task state remaining in the fpsimd regs: */
+	__this_cpu_write(fpsimd_last_state, NULL);
+
+	preempt_disable();
+
+	local_bh_enable();
 }
-EXPORT_SYMBOL(kernel_neon_begin_partial);
+EXPORT_SYMBOL(kernel_neon_begin);
 
+/*
+ * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
+ *
+ * Must be called from a context in which kernel_neon_begin() was previously
+ * called, with no call to kernel_neon_end() in the meantime.
+ *
+ * The caller must not use the FPSIMD registers after this function is called,
+ * unless kernel_neon_begin() is called again in the meantime.
+ */
 void kernel_neon_end(void)
 {
+	bool busy;
+
 	if (!system_supports_fpsimd())
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
-		fpsimd_load_partial_state(s);
-	} else {
-		preempt_enable();
-	}
+
+	busy = __this_cpu_xchg(kernel_neon_busy, false);
+	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
+
+	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+
+/*
+ * EFI runtime services support functions
+ *
+ * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
+ * This means that for EFI (and only for EFI), we have to assume that FPSIMD
+ * is always used rather than being an optional accelerator.
+ *
+ * These functions provide the necessary support for ensuring FPSIMD
+ * save/restore in the contexts from which EFI is used.
+ *
+ * Do not use them for any other purpose -- if tempted to do so, you are
+ * either doing something wrong or you need to propose some refactoring.
+ */
+
+/*
+ * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
+ */
+void __efi_fpsimd_begin(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	WARN_ON(preemptible());
+
+	if (may_use_simd())
+		kernel_neon_begin();
+	else {
+		fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+		__this_cpu_write(efi_fpsimd_state_used, true);
+	}
+}
+
+/*
+ * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
+ */
+void __efi_fpsimd_end(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	if (__this_cpu_xchg(efi_fpsimd_state_used, false))
+		fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+	else
+		kernel_neon_end();
+}
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 #ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index adb0910b88f5..7434ec0c7a27 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -143,8 +143,8 @@ preserve_boot_args:
 	dmb	sy				// needed before dc ivac with
 						// MMU off
 
-	add	x1, x0, #0x20			// 4 x 8 bytes
-	b	__inval_cache_range		// tail call
+	mov	x1, #0x20			// 4 x 8 bytes
+	b	__inval_dcache_area		// tail call
 ENDPROC(preserve_boot_args)
 
 /*
@@ -221,20 +221,20 @@ __create_page_tables:
 	 * dirty cache lines being evicted.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
-	bl	__inval_cache_range
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+	bl	__inval_dcache_area
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
-	cmp	x0, x6
-	b.lo	1b
+	subs	x1, x1, #64
+	b.ne	1b
 
 	mov	x7, SWAPPER_MM_MMUFLAGS
 
@@ -307,9 +307,9 @@ __create_page_tables:
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 	dmb	sy
-	bl	__inval_cache_range
+	bl	__inval_dcache_area
 
 	ret	x28
 ENDPROC(__create_page_tables)
@@ -361,6 +361,9 @@ __primary_switched:
 	ret					// to __primary_switch()
 0:
 #endif
+	add	sp, sp, #16
+	mov	x29, #0
+	mov	x30, #0
 	b	start_kernel
 ENDPROC(__primary_switched)
 
@@ -616,6 +619,7 @@ __secondary_switched:
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	msr	sp_el0, x2
 	mov	x29, #0
+	mov	x30, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
 
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a44e13942d30..095d3c170f5d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 * read only (code, rodata). Clear the RDONLY bit from
 		 * the temporary mappings we use during restore.
 		 */
-		set_pte(dst_pte, pte_clear_rdonly(pte));
+		set_pte(dst_pte, pte_mkwrite(pte));
 	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 		/*
 		 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+		set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
 	}
 }
 
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 2386b26c0712..713561e5bcab 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -23,15 +23,16 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/irq.h>
+#include <linux/memory.h>
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 
 unsigned long irq_err_count;
 
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
+DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
@@ -50,8 +51,43 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 	handle_arch_irq = handle_irq;
 }
 
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+	int cpu;
+	unsigned long *p;
+
+	for_each_possible_cpu(cpu) {
+		/*
+		* To ensure that VMAP'd stack overflow detection works
+		* correctly, the IRQ stacks need to have the same
+		* alignment as other stacks.
+		*/
+		p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
+					 VMALLOC_START, VMALLOC_END,
+					 THREADINFO_GFP, PAGE_KERNEL,
+					 0, cpu_to_node(cpu),
+					 __builtin_return_address(0));
+
+		per_cpu(irq_stack_ptr, cpu) = p;
+	}
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
+static void init_irq_stacks(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+#endif
+
 void __init init_IRQ(void)
 {
+	init_irq_stacks();
 	irqchip_init();
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 481f54a866c5..11121f608eb5 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -252,7 +252,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
 	local_irq_disable();
 
 	/* shutdown non-crashing cpus */
-	smp_send_crash_stop();
+	crash_smp_send_stop();
 
 	/* for crashing cpu */
 	crash_save_cpu(regs, smp_processor_id());
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 713ca824f266..bcafd7dcfe8b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -162,7 +162,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 	}
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b5798ba21189..9eaef51f83ff 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -202,55 +202,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
 };
 
-/* ARM Cortex-A53 HW events mapping. */
-static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-/* ARM Cortex-A57 and Cortex-A72 events mapping. */
-static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
-/* Broadcom Vulcan events mapping */
-static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -281,27 +232,10 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
-	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-
-	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -314,18 +248,26 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -340,8 +282,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
 	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
 	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
 
@@ -349,13 +289,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -368,22 +301,11 @@ static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
-
 	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-
 	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
 	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
@@ -846,17 +768,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
 
-	/* Always place a cycle counter into the cycle counter. */
+	/* Always prefer to place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return ARMV8_IDX_CYCLE_COUNTER;
+		if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return ARMV8_IDX_CYCLE_COUNTER;
 	}
 
 	/*
-	 * For anything other than a cycle counter, try and use
-	 * the events counters
+	 * Otherwise use events counters
 	 */
 	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
 		if (!test_and_set_bit(idx, cpuc->used_mask))
@@ -924,7 +843,13 @@ static void armv8pmu_reset(void *info)
 			    ARMV8_PMU_PMCR_LC);
 }
 
-static int armv8_pmuv3_map_event(struct perf_event *event)
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+				   const unsigned (*extra_event_map)
+						  [PERF_COUNT_HW_MAX],
+				   const unsigned (*extra_cache_map)
+						  [PERF_COUNT_HW_CACHE_MAX]
+						  [PERF_COUNT_HW_CACHE_OP_MAX]
+						  [PERF_COUNT_HW_CACHE_RESULT_MAX])
 {
 	int hw_event_id;
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
@@ -932,44 +857,47 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
 	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				       &armv8_pmuv3_perf_cache_map,
 				       ARMV8_PMU_EVTYPE_EVENT);
-	if (hw_event_id < 0)
-		return hw_event_id;
 
-	/* disable micro/arch events not supported by this PMU */
-	if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
-		!test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
-			return -EOPNOTSUPP;
+	/* Onl expose micro/arch events supported by this PMU */
+	if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+	    && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+		return hw_event_id;
 	}
 
-	return hw_event_id;
+	return armpmu_map_event(event, extra_event_map, extra_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, NULL);
 }
 
 static int armv8_a53_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a53_perf_map,
-				&armv8_a53_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
 }
 
 static int armv8_a57_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a57_perf_map,
-				&armv8_a57_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
+}
+
+static int armv8_a73_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
 }
 
 static int armv8_thunder_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_thunder_perf_map,
-				&armv8_thunder_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, NULL,
+				       &armv8_thunder_perf_cache_map);
 }
 
 static int armv8_vulcan_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_vulcan_perf_map,
-				&armv8_vulcan_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, NULL,
+				       &armv8_vulcan_perf_cache_map);
 }
 
 struct armv8pmu_probe_info {
@@ -1062,6 +990,22 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 	return 0;
 }
 
+static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
+	cpu_pmu->name			= "armv8_cortex_a35";
+	cpu_pmu->map_event		= armv8_a53_map_event;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
+
+	return 0;
+}
+
 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int ret = armv8_pmu_init(cpu_pmu);
@@ -1110,6 +1054,22 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
 	return 0;
 }
 
+static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
+	cpu_pmu->name			= "armv8_cortex_a73";
+	cpu_pmu->map_event		= armv8_a73_map_event;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
+
+	return 0;
+}
+
 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int ret = armv8_pmu_init(cpu_pmu);
@@ -1144,9 +1104,11 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
+	{.compatible = "arm,cortex-a35-pmu",	.data = armv8_a35_pmu_init},
 	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
 	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
 	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
+	{.compatible = "arm,cortex-a73-pmu",	.data = armv8_a73_pmu_init},
 	{.compatible = "cavium,thunder-pmu",	.data = armv8_thunder_pmu_init},
 	{.compatible = "brcm,vulcan-pmu",	.data = armv8_vulcan_pmu_init},
 	{},
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 26c998534dca..636ca0119c0e 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -40,7 +40,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	probe_opcode_t insn;
 
 	/* TODO: Currently we do not support AARCH32 instruction probing */
-	if (test_bit(TIF_32BIT, &mm->context.flags))
+	if (mm->context.flags & MMCF_AARCH32)
 		return -ENOTSUPP;
 	else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
 		return -EINVAL;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 659ae8094ed5..2dc0f8482210 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -360,6 +360,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
 	 * the thread migrates to a different CPU.
+	 * This full barrier is also required by the membarrier system
+	 * call.
 	 */
 	dsb(ish);
 
@@ -382,15 +384,12 @@ unsigned long get_wchan(struct task_struct *p)
 		return 0;
 
 	frame.fp = thread_saved_fp(p);
-	frame.sp = thread_saved_sp(p);
 	frame.pc = thread_saved_pc(p);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = p->curr_ret_stack;
 #endif
 	do {
-		if (frame.sp < stack_page ||
-		    frame.sp >= stack_page + THREAD_SIZE ||
-		    unwind_frame(p, &frame))
+		if (unwind_frame(p, &frame))
 			goto out;
 		if (!in_sched_functions(frame.pc)) {
 			ret = frame.pc;
@@ -417,3 +416,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	else
 		return randomize_page(mm->brk, SZ_1G);
 }
+
+/*
+ * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
+ */
+void arch_setup_new_exec(void)
+{
+	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..9cbb6123208f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -42,6 +42,7 @@
 #include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
+#include <asm/stacktrace.h>
 #include <asm/syscall.h>
 #include <asm/traps.h>
 #include <asm/system_misc.h>
@@ -127,7 +128,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
 {
 	return ((addr & ~(THREAD_SIZE - 1))  ==
 		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-		on_irq_stack(addr, raw_smp_processor_id());
+		on_irq_stack(addr);
 }
 
 /**
@@ -1363,7 +1364,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		regs->syscallno = ~0UL;
+		forget_syscall(regs);
 
 	regs->regs[regno] = saved_reg;
 }
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 12a87f2600f2..933adbc0f654 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -42,7 +42,6 @@ void *return_address(unsigned int level)
 	data.addr = NULL;
 
 	frame.fp = (unsigned long)__builtin_frame_address(0);
-	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 089c3747995d..c45214f8fb54 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -29,6 +29,7 @@
 #include <linux/string.h>
 #include <linux/tracehook.h>
 #include <linux/ratelimit.h>
+#include <linux/syscalls.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
@@ -36,6 +37,7 @@
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/fpsimd.h>
+#include <asm/ptrace.h>
 #include <asm/signal32.h>
 #include <asm/vdso.h>
 
@@ -387,7 +389,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 	if (err == 0)
@@ -673,13 +675,12 @@ static void do_signal(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
-	int syscall = (int)regs->syscallno;
 	struct ksignal ksig;
 
 	/*
 	 * If we were from a system call, check for system call restarting...
 	 */
-	if (syscall >= 0) {
+	if (in_syscall(regs)) {
 		continue_addr = regs->pc;
 		restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
 		retval = regs->regs[0];
@@ -687,7 +688,7 @@ static void do_signal(struct pt_regs *regs)
 		/*
 		 * Avoid additional syscall restarting via ret_to_user.
 		 */
-		regs->syscallno = ~0UL;
+		forget_syscall(regs);
 
 		/*
 		 * Prepare for system call restart. We do this here so that a
@@ -731,7 +732,7 @@ static void do_signal(struct pt_regs *regs)
 	 * Handle restarting a different system call. As above, if a debugger
 	 * has chosen to restart at a different PC, ignore the restart.
 	 */
-	if (syscall >= 0 && regs->pc == restart_addr) {
+	if (in_syscall(regs) && regs->pc == restart_addr) {
 		if (retval == -ERESTART_RESTARTBLOCK)
 			setup_restart_syscall(regs);
 		user_rewind_single_step(current);
@@ -749,6 +750,10 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	 * Update the trace code with the current status.
 	 */
 	trace_hardirqs_off();
+
+	/* Check valid user FS if needed */
+	addr_limit_user_check();
+
 	do {
 		if (thread_flags & _TIF_NEED_RESCHED) {
 			schedule();
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index c747a0fc5d7d..4e5a664be04b 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid compat_sys_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc66e6ec3a99..ffe089942ac4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -154,7 +154,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * page tables.
 	 */
 	secondary_data.task = idle;
-	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
 	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
@@ -977,11 +977,21 @@ void smp_send_stop(void)
 }
 
 #ifdef CONFIG_KEXEC_CORE
-void smp_send_crash_stop(void)
+void crash_smp_send_stop(void)
 {
+	static int cpus_stopped;
 	cpumask_t mask;
 	unsigned long timeout;
 
+	/*
+	 * This function can be called twice in panic path, but obviously
+	 * we execute this only once.
+	 */
+	if (cpus_stopped)
+		return;
+
+	cpus_stopped = 1;
+
 	if (num_online_cpus() == 1)
 		return;
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 09d37d66b630..3144584617e7 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -42,33 +42,17 @@
  */
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
-	unsigned long high, low;
 	unsigned long fp = frame->fp;
-	unsigned long irq_stack_ptr;
+
+	if (fp & 0xf)
+		return -EINVAL;
 
 	if (!tsk)
 		tsk = current;
 
-	/*
-	 * Switching between stacks is valid when tracing current and in
-	 * non-preemptible context.
-	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
-	else
-		irq_stack_ptr = 0;
-
-	low  = frame->sp;
-	/* irq stacks are not THREAD_SIZE aligned */
-	if (on_irq_stack(frame->sp, raw_smp_processor_id()))
-		high = irq_stack_ptr;
-	else
-		high = ALIGN(low, THREAD_SIZE) - 0x20;
-
-	if (fp < low || fp > high || fp & 0xf)
+	if (!on_accessible_stack(tsk, fp))
 		return -EINVAL;
 
-	frame->sp = fp + 0x10;
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
@@ -86,34 +70,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	/*
-	 * Check whether we are going to walk through from interrupt stack
-	 * to task stack.
-	 * If we reach the end of the stack - and its an interrupt stack,
-	 * unpack the dummy frame to find the original elr.
-	 *
-	 * Check the frame->fp we read from the bottom of the irq_stack,
-	 * and the original task stack pointer are both in current->stack.
+	 * Frames created upon entry from EL0 have NULL FP and PC values, so
+	 * don't bother reporting these. Frames created by __noreturn functions
+	 * might have a valid FP even if PC is bogus, so only terminate where
+	 * both are NULL.
 	 */
-	if (frame->sp == irq_stack_ptr) {
-		struct pt_regs *irq_args;
-		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
-
-		if (object_is_on_stack((void *)orig_sp) &&
-		   object_is_on_stack((void *)frame->fp)) {
-			frame->sp = orig_sp;
-
-			/* orig_sp is the saved pt_regs, find the elr */
-			irq_args = (struct pt_regs *)orig_sp;
-			frame->pc = irq_args->pc;
-		} else {
-			/*
-			 * This frame has a non-standard format, and we
-			 * didn't fix it, because the data looked wrong.
-			 * Refuse to output this frame.
-			 */
-			return -EINVAL;
-		}
-	}
+	if (!frame->fp && !frame->pc)
+		return -EINVAL;
 
 	return 0;
 }
@@ -167,7 +130,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	data.no_sched_functions = 0;
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
@@ -192,12 +154,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (tsk != current) {
 		data.no_sched_functions = 1;
 		frame.fp = thread_saved_fp(tsk);
-		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	} else {
 		data.no_sched_functions = 0;
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index da33c90248e9..a4391280fba9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -50,7 +50,6 @@ unsigned long profile_pc(struct pt_regs *regs)
 		return regs->pc;
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = -1; /* no task info */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a62648848e5..5ea4b85aee0e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sizes.h>
 #include <linux/syscalls.h>
 #include <linux/mm_types.h>
 
@@ -41,6 +42,7 @@
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/traps.h>
+#include <asm/smp.h>
 #include <asm/stack_pointer.h>
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
@@ -143,7 +145,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr;
 	int skip;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
@@ -154,25 +155,14 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	if (!try_get_task_stack(tsk))
 		return;
 
-	/*
-	 * Switching between stacks is valid when tracing current and in
-	 * non-preemptible context.
-	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
-	else
-		irq_stack_ptr = 0;
-
 	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)dump_backtrace;
 	} else {
 		/*
 		 * task blocked in __switch_to
 		 */
 		frame.fp = thread_saved_fp(tsk);
-		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	}
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	skip = !!regs;
 	printk("Call trace:\n");
 	while (1) {
-		unsigned long where = frame.pc;
 		unsigned long stack;
 		int ret;
 
 		/* skip until specified stack frame */
 		if (!skip) {
-			dump_backtrace_entry(where);
+			dump_backtrace_entry(frame.pc);
 		} else if (frame.fp == regs->regs[29]) {
 			skip = 0;
 			/*
@@ -203,20 +192,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;
-		stack = frame.sp;
-		if (in_exception_text(where)) {
-			/*
-			 * If we switched to the irq_stack before calling this
-			 * exception handler, then the pt_regs will be on the
-			 * task stack. The easiest way to tell is if the large
-			 * pt_regs would overlap with the end of the irq_stack.
-			 */
-			if (stack < irq_stack_ptr &&
-			    (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
-				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+		if (in_entry_text(frame.pc)) {
+			stack = frame.fp - offsetof(struct pt_regs, stackframe);
 
-			dump_mem("", "Exception stack", stack,
-				 stack + sizeof(struct pt_regs));
+			if (on_accessible_stack(tsk, stack))
+				dump_mem("", "Exception stack", stack,
+					 stack + sizeof(struct pt_regs));
 		}
 	}
 
@@ -257,8 +238,6 @@ static int __die(const char *str, int err, struct pt_regs *regs)
 		 end_of_stack(tsk));
 
 	if (!user_mode(regs)) {
-		dump_mem(KERN_EMERG, "Stack: ", regs->sp,
-			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
 		dump_backtrace(regs, tsk);
 		dump_instr(KERN_EMERG, regs);
 	}
@@ -484,6 +463,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
 		__user_cache_maint("dc civac", address, ret);
 		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVAP:	/* DC CVAP */
+		__user_cache_maint("sys 3, c7, c12, 1", address, ret);
+		break;
 	case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC:	/* DC CIVAC */
 		__user_cache_maint("dc civac", address, ret);
 		break;
@@ -593,7 +575,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
 
 	if (show_unhandled_signals_ratelimited()) {
 		pr_info("%s[%d]: syscall %d\n", current->comm,
-			task_pid_nr(current), (int)regs->syscallno);
+			task_pid_nr(current), regs->syscallno);
 		dump_instr("", regs);
 		if (user_mode(regs))
 			__show_regs(regs);
@@ -689,6 +671,43 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 	force_sig_info(info.si_signo, &info, current);
 }
 
+#ifdef CONFIG_VMAP_STACK
+
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
+	__aligned(16);
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
+{
+	unsigned long tsk_stk = (unsigned long)current->stack;
+	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+	unsigned int esr = read_sysreg(esr_el1);
+	unsigned long far = read_sysreg(far_el1);
+
+	console_verbose();
+	pr_emerg("Insufficient stack space to handle exception!");
+
+	pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+	pr_emerg("FAR: 0x%016lx\n", far);
+
+	pr_emerg("Task stack:     [0x%016lx..0x%016lx]\n",
+		 tsk_stk, tsk_stk + THREAD_SIZE);
+	pr_emerg("IRQ stack:      [0x%016lx..0x%016lx]\n",
+		 irq_stk, irq_stk + THREAD_SIZE);
+	pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
+		 ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
+
+	__show_regs(regs);
+
+	/*
+	 * We use nmi_panic to limit the potential for recusive overflows, and
+	 * to get a better stack trace.
+	 */
+	nmi_panic(NULL, "kernel stack overflow");
+	cpu_park_loop();
+}
+#endif
+
 void __pte_error(const char *file, int line, unsigned long val)
 {
 	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e8f759f764f2..2d419006ad43 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 }
 #endif /* CONFIG_COMPAT */
 
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	unsigned long vdso_size = vdso_end - vdso_start;
+
+	if (vdso_size != new_size)
+		return -EINVAL;
+
+	current->mm->context.vdso = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
 static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
 	{
 		.name	= "[vvar]",
 	},
 	{
 		.name	= "[vdso]",
+		.mremap = vdso_mremap,
 	},
 };
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 987a00ee446c..fe56c268a7d9 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -72,22 +72,6 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-/*
- *  4 KB granule:   1 level 2 entry
- * 16 KB granule: 128 level 3 entries, with contiguous bit
- * 64 KB granule:  32 level 3 entries, with contiguous bit
- */
-#define SEGMENT_ALIGN			SZ_2M
-#else
-/*
- *  4 KB granule:  16 level 3 entries, with contiguous bit
- * 16 KB granule:   4 level 3 entries, without contiguous bit
- * 64 KB granule:   1 level 3 entry
- */
-#define SEGMENT_ALIGN			SZ_64K
-#endif
-
 SECTIONS
 {
 	/*
@@ -192,7 +176,7 @@ SECTIONS
 
 	_data = .;
 	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
 	/*
 	 * Data written with the MMU off but read with the MMU on requires
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..a81f5e10fc8c 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void)
 	 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
 	 */
 	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
-	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+	if (tmp)
 		val |= VTCR_EL2_HA;
 
 	/*
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index c86b7909ef31..a0abc142c92b 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -17,3 +17,5 @@ CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
 		   -fcall-saved-x18
+
+lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
new file mode 100644
index 000000000000..b6ceafdb8b72
--- /dev/null
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/uaccess.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+
+void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+{
+	/*
+	 * We assume this should not be called with @dst pointing to
+	 * non-cacheable memory, such that we don't need an explicit
+	 * barrier to order the cache maintenance against the memcpy.
+	 */
+	memcpy(dst, src, cnt);
+	__clean_dcache_area_pop(dst, cnt);
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+			    size_t len)
+{
+	memcpy_flushcache(to, page_address(page) + offset, len);
+}
+
+unsigned long __copy_user_flushcache(void *to, const void __user *from,
+				     unsigned long n)
+{
+	unsigned long rc = __arch_copy_from_user(to, from, n);
+
+	/* See above */
+	__clean_dcache_area_pop(to, n - rc);
+	return rc;
+}
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 83c27b6e6dca..7f1dbe962cf5 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -109,20 +109,25 @@ ENTRY(__clean_dcache_area_pou)
 ENDPROC(__clean_dcache_area_pou)
 
 /*
- *	__dma_inv_area(start, size)
- *	- start   - virtual start address of region
+ *	__inval_dcache_area(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are invalidated. Any partial lines at the ends of the interval are
+ *	also cleaned to PoC to prevent data loss.
+ *
+ *	- kaddr   - kernel address
  *	- size    - size in question
  */
-__dma_inv_area:
-	add	x1, x1, x0
+ENTRY(__inval_dcache_area)
 	/* FALLTHROUGH */
 
 /*
- *	__inval_cache_range(start, end)
- *	- start   - start address of region
- *	- end     - end address of region
+ *	__dma_inv_area(start, size)
+ *	- start   - virtual start address of region
+ *	- size    - size in question
  */
-ENTRY(__inval_cache_range)
+__dma_inv_area:
+	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -140,7 +145,7 @@ ENTRY(__inval_cache_range)
 	b.lo	2b
 	dsb	sy
 	ret
-ENDPIPROC(__inval_cache_range)
+ENDPIPROC(__inval_dcache_area)
 ENDPROC(__dma_inv_area)
 
 /*
@@ -167,6 +172,20 @@ ENDPIPROC(__clean_dcache_area_poc)
 ENDPROC(__dma_clean_area)
 
 /*
+ *	__clean_dcache_area_pop(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoP.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_pop)
+	dcache_by_line_op cvap, sy, x0, x1, x2, x3
+	ret
+ENDPIPROC(__clean_dcache_area_pop)
+
+/*
  *	__dma_flush_area(start, size)
  *
  *	clean & invalidate D / U line
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index f27d4dd04384..614af886b7ef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
 	return prot;
 }
 
-static struct gen_pool *atomic_pool;
+static struct gen_pool *atomic_pool __ro_after_init;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
@@ -425,7 +425,7 @@ static int __init atomic_pool_init(void)
 
 		gen_pool_set_algo(atomic_pool,
 				  gen_pool_first_fit_order_align,
-				  (void *)PAGE_SHIFT);
+				  NULL);
 
 		pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
 			atomic_pool_size / 1024);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1f22a41565a3..89993c4be1be 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -34,6 +34,7 @@
 #include <linux/hugetlb.h>
 
 #include <asm/bug.h>
+#include <asm/cmpxchg.h>
 #include <asm/cpufeature.h>
 #include <asm/exception.h>
 #include <asm/debug-monitors.h>
@@ -82,6 +83,49 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
 }
 #endif
 
+static void data_abort_decode(unsigned int esr)
+{
+	pr_alert("Data abort info:\n");
+
+	if (esr & ESR_ELx_ISV) {
+		pr_alert("  Access size = %u byte(s)\n",
+			 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT));
+		pr_alert("  SSE = %lu, SRT = %lu\n",
+			 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT,
+			 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT);
+		pr_alert("  SF = %lu, AR = %lu\n",
+			 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
+			 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
+	} else {
+		pr_alert("  ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK);
+	}
+
+	pr_alert("  CM = %lu, WnR = %lu\n",
+		 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
+		 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+}
+
+/*
+ * Decode mem abort information
+ */
+static void mem_abort_decode(unsigned int esr)
+{
+	pr_alert("Mem abort info:\n");
+
+	pr_alert("  Exception class = %s, IL = %u bits\n",
+		 esr_get_class_string(esr),
+		 (esr & ESR_ELx_IL) ? 32 : 16);
+	pr_alert("  SET = %lu, FnV = %lu\n",
+		 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
+		 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT);
+	pr_alert("  EA = %lu, S1PTW = %lu\n",
+		 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
+		 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+
+	if (esr_is_data_abort(esr))
+		data_abort_decode(esr);
+}
+
 /*
  * Dump out the page tables associated with 'addr' in the currently active mm.
  */
@@ -139,7 +183,6 @@ void show_pte(unsigned long addr)
 	pr_cont("\n");
 }
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 /*
  * This function sets the access flags (dirty, accessed), as well as write
  * permission, and only to a more permissive setting.
@@ -154,18 +197,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
+	pteval_t old_pteval, pteval;
 
 	if (pte_same(*ptep, entry))
 		return 0;
 
 	/* only preserve the access flags and write permission */
-	pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
-
-	/* set PTE_RDONLY if actual read-only or clean PTE */
-	if (!pte_write(entry) || !pte_sw_dirty(entry))
-		pte_val(entry) |= PTE_RDONLY;
+	pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
 
 	/*
 	 * Setting the flags must be done atomically to avoid racing with the
@@ -174,21 +212,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * (calculated as: a & b == ~(~a | ~b)).
 	 */
 	pte_val(entry) ^= PTE_RDONLY;
-	asm volatile("//	ptep_set_access_flags\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
-	"	orr	%0, %0, %4		// set flags\n"
-	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
+	pteval = READ_ONCE(pte_val(*ptep));
+	do {
+		old_pteval = pteval;
+		pteval ^= PTE_RDONLY;
+		pteval |= pte_val(entry);
+		pteval ^= PTE_RDONLY;
+		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
+	} while (pteval != old_pteval);
 
 	flush_tlb_fix_spurious_fault(vma, address);
 	return 1;
 }
-#endif
 
 static bool is_el1_instruction_abort(unsigned int esr)
 {
@@ -248,6 +283,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
 		 addr);
 
+	mem_abort_decode(esr);
+
 	show_pte(addr);
 	die("Oops", regs, esr);
 	bust_spinlocks(0);
@@ -705,6 +742,8 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
 		 inf->name, esr, addr);
 
+	mem_abort_decode(esr);
+
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 21a8d828cbf4..e36ed5087b5c 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -83,3 +83,19 @@ EXPORT_SYMBOL(flush_dcache_page);
  * Additional functions defined in assembly.
  */
 EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+	/* Ensure order against any prior non-cacheable writes */
+	dmb(osh);
+	__clean_dcache_area_pop(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+	__inval_dcache_area(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+#endif
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 656e0ece2289..6cb0fa92a651 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,6 +41,16 @@ int pud_huge(pud_t pud)
 #endif
 }
 
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
+
+	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, size_t *pgsize)
 {
@@ -58,15 +68,107 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 	return CONT_PTES;
 }
 
+static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
+{
+	int contig_ptes = 0;
+
+	*pgsize = size;
+
+	switch (size) {
+#ifdef CONFIG_ARM64_4K_PAGES
+	case PUD_SIZE:
+#endif
+	case PMD_SIZE:
+		contig_ptes = 1;
+		break;
+	case CONT_PMD_SIZE:
+		*pgsize = PMD_SIZE;
+		contig_ptes = CONT_PMDS;
+		break;
+	case CONT_PTE_SIZE:
+		*pgsize = PAGE_SIZE;
+		contig_ptes = CONT_PTES;
+		break;
+	}
+
+	return contig_ptes;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step.
+ */
+static pte_t get_clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pgsize,
+			     unsigned long ncontig)
+{
+	struct vm_area_struct vma = { .vm_mm = mm };
+	pte_t orig_pte = huge_ptep_get(ptep);
+	bool valid = pte_valid(orig_pte);
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+		/*
+		 * If HW_AFDBM is enabled, then the HW could turn on
+		 * the dirty bit for any page in the set, so check
+		 * them all.  All hugetlb entries are already young.
+		 */
+		if (pte_dirty(pte))
+			orig_pte = pte_mkdirty(orig_pte);
+	}
+
+	if (valid)
+		flush_tlb_range(&vma, saddr, addr);
+	return orig_pte;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step for use cases where the
+ * original pte is not needed.
+ */
+static void clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pgsize,
+			     unsigned long ncontig)
+{
+	struct vm_area_struct vma = { .vm_mm = mm };
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+		pte_clear(mm, addr, ptep);
+
+	flush_tlb_range(&vma, saddr, addr);
+}
+
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte)
 {
 	size_t pgsize;
 	int i;
 	int ncontig;
-	unsigned long pfn;
+	unsigned long pfn, dpfn;
 	pgprot_t hugeprot;
 
+	/*
+	 * Code needs to be expanded to handle huge swap and migration
+	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
+	 */
+	WARN_ON(!pte_present(pte));
+
 	if (!pte_cont(pte)) {
 		set_pte_at(mm, addr, ptep, pte);
 		return;
@@ -74,17 +176,30 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 	pfn = pte_pfn(pte);
-	hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-	for (i = 0; i < ncontig; i++) {
+	dpfn = pgsize >> PAGE_SHIFT;
+	hugeprot = pte_pgprot(pte);
+
+	clear_flush(mm, addr, ptep, pgsize, ncontig);
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 			 pte_val(pfn_pte(pfn, hugeprot)));
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-		ptep++;
-		pfn += pgsize >> PAGE_SHIFT;
-		addr += pgsize;
 	}
 }
 
+void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+			  pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	int i, ncontig;
+	size_t pgsize;
+
+	ncontig = num_contig_ptes(sz, &pgsize);
+
+	for (i = 0; i < ncontig; i++, ptep++)
+		set_pte(ptep, pte);
+}
+
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 		      unsigned long addr, unsigned long sz)
 {
@@ -144,19 +259,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		return NULL;
 
 	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	if (sz != PUD_SIZE && pud_none(*pud))
 		return NULL;
-	/* swap or huge page */
-	if (!pud_present(*pud) || pud_huge(*pud))
+	/* hugepage or swap? */
+	if (pud_huge(*pud) || !pud_present(*pud))
 		return (pte_t *)pud;
 	/* table; check the next level */
 
+	if (sz == CONT_PMD_SIZE)
+		addr &= CONT_PMD_MASK;
+
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
+	    pmd_none(*pmd))
 		return NULL;
-	if (!pmd_present(*pmd) || pmd_huge(*pmd))
+	if (pmd_huge(*pmd) || !pmd_present(*pmd))
 		return (pte_t *)pmd;
 
+	if (sz == CONT_PTE_SIZE) {
+		pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
+		return pte;
+	}
+
 	return NULL;
 }
 
@@ -176,111 +300,133 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 	return entry;
 }
 
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, unsigned long sz)
+{
+	int i, ncontig;
+	size_t pgsize;
+
+	ncontig = num_contig_ptes(sz, &pgsize);
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+		pte_clear(mm, addr, ptep);
+}
+
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
-	pte_t pte;
+	int ncontig;
+	size_t pgsize;
+	pte_t orig_pte = huge_ptep_get(ptep);
 
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize;
-		bool is_dirty = false;
-
-		ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-		/* save the 1st pte to return */
-		pte = ptep_get_and_clear(mm, addr, ptep);
-		for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
-			/*
-			 * If HW_AFDBM is enabled, then the HW could
-			 * turn on the dirty bit for any of the page
-			 * in the set, so check them all.
-			 */
-			++ptep;
-			if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
-				is_dirty = true;
-		}
-		if (is_dirty)
-			return pte_mkdirty(pte);
-		else
-			return pte;
-	} else {
+	if (!pte_cont(orig_pte))
 		return ptep_get_and_clear(mm, addr, ptep);
-	}
+
+	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+
+	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 }
 
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 			       unsigned long addr, pte_t *ptep,
 			       pte_t pte, int dirty)
 {
-	if (pte_cont(pte)) {
-		int ncontig, i, changed = 0;
-		size_t pgsize = 0;
-		unsigned long pfn = pte_pfn(pte);
-		/* Select all bits except the pfn */
-		pgprot_t hugeprot =
-			__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
-				 pte_val(pte));
-
-		pfn = pte_pfn(pte);
-		ncontig = find_num_contig(vma->vm_mm, addr, ptep,
-					  &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
-			changed |= ptep_set_access_flags(vma, addr, ptep,
-							pfn_pte(pfn,
-								hugeprot),
-							dirty);
-			pfn += pgsize >> PAGE_SHIFT;
-		}
-		return changed;
-	} else {
+	int ncontig, i, changed = 0;
+	size_t pgsize = 0;
+	unsigned long pfn = pte_pfn(pte), dpfn;
+	pgprot_t hugeprot;
+	pte_t orig_pte;
+
+	if (!pte_cont(pte))
 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-	}
+
+	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+	dpfn = pgsize >> PAGE_SHIFT;
+
+	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+	if (!pte_same(orig_pte, pte))
+		changed = 1;
+
+	/* Make sure we don't lose the dirty state */
+	if (pte_dirty(orig_pte))
+		pte = pte_mkdirty(pte);
+
+	hugeprot = pte_pgprot(pte);
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
+
+	return changed;
 }
 
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 			     unsigned long addr, pte_t *ptep)
 {
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize = 0;
-
-		ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
-			ptep_set_wrprotect(mm, addr, ptep);
-	} else {
+	unsigned long pfn, dpfn;
+	pgprot_t hugeprot;
+	int ncontig, i;
+	size_t pgsize;
+	pte_t pte;
+
+	if (!pte_cont(*ptep)) {
 		ptep_set_wrprotect(mm, addr, ptep);
+		return;
 	}
+
+	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+	dpfn = pgsize >> PAGE_SHIFT;
+
+	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+	pte = pte_wrprotect(pte);
+
+	hugeprot = pte_pgprot(pte);
+	pfn = pte_pfn(pte);
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 }
 
 void huge_ptep_clear_flush(struct vm_area_struct *vma,
 			   unsigned long addr, pte_t *ptep)
 {
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize = 0;
-
-		ncontig = find_num_contig(vma->vm_mm, addr, ptep,
-					  &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
-			ptep_clear_flush(vma, addr, ptep);
-	} else {
+	size_t pgsize;
+	int ncontig;
+
+	if (!pte_cont(*ptep)) {
 		ptep_clear_flush(vma, addr, ptep);
+		return;
 	}
+
+	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+	clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 }
 
 static __init int setup_hugepagesz(char *opt)
 {
 	unsigned long ps = memparse(opt, &opt);
 
-	if (ps == PMD_SIZE) {
-		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-	} else if (ps == PUD_SIZE) {
-		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else {
-		hugetlb_bad_size();
-		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
-		return 0;
+	switch (ps) {
+#ifdef CONFIG_ARM64_4K_PAGES
+	case PUD_SIZE:
+#endif
+	case PMD_SIZE * CONT_PMDS:
+	case PMD_SIZE:
+	case PAGE_SIZE * CONT_PTES:
+		hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
+		return 1;
 	}
-	return 1;
+
+	hugetlb_bad_size();
+	pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
+	return 0;
 }
 __setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+		hugetlb_add_hstate(CONT_PTE_SHIFT);
+	return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b02a9268dfbf..783de51a6c4e 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -44,8 +44,12 @@
 #define A64_COND_NE	AARCH64_INSN_COND_NE /* != */
 #define A64_COND_CS	AARCH64_INSN_COND_CS /* unsigned >= */
 #define A64_COND_HI	AARCH64_INSN_COND_HI /* unsigned > */
+#define A64_COND_LS	AARCH64_INSN_COND_LS /* unsigned <= */
+#define A64_COND_CC	AARCH64_INSN_COND_CC /* unsigned < */
 #define A64_COND_GE	AARCH64_INSN_COND_GE /* signed >= */
 #define A64_COND_GT	AARCH64_INSN_COND_GT /* signed > */
+#define A64_COND_LE	AARCH64_INSN_COND_LE /* signed <= */
+#define A64_COND_LT	AARCH64_INSN_COND_LT /* signed < */
 #define A64_B_(cond, imm19) A64_COND_BRANCH(cond, (imm19) << 2)
 
 /* Unconditional branch (immediate) */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f32144b2e07f..ba38d403abb2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -527,10 +527,14 @@ emit_bswap_uxt:
 	/* IF (dst COND src) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JNE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 		emit(A64_CMP(1, dst, src), ctx);
 emit_cond_jmp:
 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
@@ -542,9 +546,15 @@ emit_cond_jmp:
 		case BPF_JGT:
 			jmp_cond = A64_COND_HI;
 			break;
+		case BPF_JLT:
+			jmp_cond = A64_COND_CC;
+			break;
 		case BPF_JGE:
 			jmp_cond = A64_COND_CS;
 			break;
+		case BPF_JLE:
+			jmp_cond = A64_COND_LS;
+			break;
 		case BPF_JSET:
 		case BPF_JNE:
 			jmp_cond = A64_COND_NE;
@@ -552,9 +562,15 @@ emit_cond_jmp:
 		case BPF_JSGT:
 			jmp_cond = A64_COND_GT;
 			break;
+		case BPF_JSLT:
+			jmp_cond = A64_COND_LT;
+			break;
 		case BPF_JSGE:
 			jmp_cond = A64_COND_GE;
 			break;
+		case BPF_JSLE:
+			jmp_cond = A64_COND_LE;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -566,10 +582,14 @@ emit_cond_jmp:
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
 	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JNE | BPF_K:
 	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
 		emit_a64_mov_i(1, tmp, imm, ctx);
 		emit(A64_CMP(1, dst, tmp), ctx);
 		goto emit_cond_jmp;
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index c58f4a83ed6f..f6431439d15d 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -48,11 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	__raw_spin_unlock_asm(&lock->lock);
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) > 0;
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 0188c933b155..15af5768c403 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -4,8 +4,6 @@
  * Licensed under the GPL-2 or later
  */
 
-#define pr_fmt(fmt) "module %s: " fmt, mod->name
-
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
@@ -16,6 +14,11 @@
 #include <asm/cacheflush.h>
 #include <linux/uaccess.h>
 
+#define mod_err(mod, fmt, ...)						\
+	pr_err("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
+#define mod_debug(mod, fmt, ...)					\
+	pr_debug("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
+
 /* Transfer the section to the L1 memory */
 int
 module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
@@ -44,7 +47,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l1_inst_sram_alloc(s->sh_size);
 			mod->arch.text_l1 = dest;
 			if (dest == NULL) {
-				pr_err("L1 inst memory allocation failed\n");
+				mod_err(mod, "L1 inst memory allocation failed\n");
 				return -1;
 			}
 			dma_memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -56,7 +59,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l1_data_sram_alloc(s->sh_size);
 			mod->arch.data_a_l1 = dest;
 			if (dest == NULL) {
-				pr_err("L1 data memory allocation failed\n");
+				mod_err(mod, "L1 data memory allocation failed\n");
 				return -1;
 			}
 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -68,7 +71,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l1_data_sram_zalloc(s->sh_size);
 			mod->arch.bss_a_l1 = dest;
 			if (dest == NULL) {
-				pr_err("L1 data memory allocation failed\n");
+				mod_err(mod, "L1 data memory allocation failed\n");
 				return -1;
 			}
 
@@ -77,7 +80,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l1_data_B_sram_alloc(s->sh_size);
 			mod->arch.data_b_l1 = dest;
 			if (dest == NULL) {
-				pr_err("L1 data memory allocation failed\n");
+				mod_err(mod, "L1 data memory allocation failed\n");
 				return -1;
 			}
 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -87,7 +90,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l1_data_B_sram_alloc(s->sh_size);
 			mod->arch.bss_b_l1 = dest;
 			if (dest == NULL) {
-				pr_err("L1 data memory allocation failed\n");
+				mod_err(mod, "L1 data memory allocation failed\n");
 				return -1;
 			}
 			memset(dest, 0, s->sh_size);
@@ -99,7 +102,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l2_sram_alloc(s->sh_size);
 			mod->arch.text_l2 = dest;
 			if (dest == NULL) {
-				pr_err("L2 SRAM allocation failed\n");
+				mod_err(mod, "L2 SRAM allocation failed\n");
 				return -1;
 			}
 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -111,7 +114,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l2_sram_alloc(s->sh_size);
 			mod->arch.data_l2 = dest;
 			if (dest == NULL) {
-				pr_err("L2 SRAM allocation failed\n");
+				mod_err(mod, "L2 SRAM allocation failed\n");
 				return -1;
 			}
 			memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -123,7 +126,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			dest = l2_sram_zalloc(s->sh_size);
 			mod->arch.bss_l2 = dest;
 			if (dest == NULL) {
-				pr_err("L2 SRAM allocation failed\n");
+				mod_err(mod, "L2 SRAM allocation failed\n");
 				return -1;
 			}
 
@@ -157,8 +160,8 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 	Elf32_Sym *sym;
 	unsigned long location, value, size;
 
-	pr_debug("applying relocate section %u to %u\n",
-		relsec, sechdrs[relsec].sh_info);
+	mod_debug(mod, "applying relocate section %u to %u\n",
+		  relsec, sechdrs[relsec].sh_info);
 
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
@@ -174,14 +177,14 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
 #ifdef CONFIG_SMP
 		if (location >= COREB_L1_DATA_A_START) {
-			pr_err("cannot relocate in L1: %u (SMP kernel)\n",
+			mod_err(mod, "cannot relocate in L1: %u (SMP kernel)\n",
 				ELF32_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
 #endif
 
-		pr_debug("location is %lx, value is %lx type is %d\n",
-			location, value, ELF32_R_TYPE(rel[i].r_info));
+		mod_debug(mod, "location is %lx, value is %lx type is %d\n",
+			  location, value, ELF32_R_TYPE(rel[i].r_info));
 
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 
@@ -200,12 +203,12 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		case R_BFIN_PCREL12_JUMP:
 		case R_BFIN_PCREL12_JUMP_S:
 		case R_BFIN_PCREL10:
-			pr_err("unsupported relocation: %u (no -mlong-calls?)\n",
+			mod_err(mod, "unsupported relocation: %u (no -mlong-calls?)\n",
 				ELF32_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 
 		default:
-			pr_err("unknown relocation: %u\n",
+			mod_err(mod, "unknown relocation: %u\n",
 				ELF32_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
@@ -222,7 +225,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 			isram_memcpy((void *)location, &value, size);
 			break;
 		default:
-			pr_err("invalid relocation for %#lx\n", location);
+			mod_err(mod, "invalid relocation for %#lx\n", location);
 			return -ENOEXEC;
 		}
 	}
diff --git a/arch/cris/arch-v32/mach-a3/arbiter.c b/arch/cris/arch-v32/mach-a3/arbiter.c
index ab5c421a4de8..735a9b0abdb8 100644
--- a/arch/cris/arch-v32/mach-a3/arbiter.c
+++ b/arch/cris/arch-v32/mach-a3/arbiter.c
@@ -227,7 +227,7 @@ static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
 	}
 }
 
-extern char _stext, _etext;
+extern char _stext[], _etext[];
 
 static void crisv32_arbiter_init(void)
 {
@@ -265,7 +265,7 @@ static void crisv32_arbiter_init(void)
 
 #ifndef CONFIG_ETRAX_KGDB
 	/* Global watch for writes to kernel text segment. */
-	crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+	crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext,
 		MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),
 			      arbiter_all_write, NULL);
 #endif
diff --git a/arch/cris/arch-v32/mach-fs/arbiter.c b/arch/cris/arch-v32/mach-fs/arbiter.c
index c97f4d8120f9..047c70bdbb23 100644
--- a/arch/cris/arch-v32/mach-fs/arbiter.c
+++ b/arch/cris/arch-v32/mach-fs/arbiter.c
@@ -158,7 +158,7 @@ static void crisv32_arbiter_config(int region, int unused_slots)
 	}
 }
 
-extern char _stext, _etext;
+extern char _stext[], _etext[];
 
 static void crisv32_arbiter_init(void)
 {
@@ -190,7 +190,7 @@ static void crisv32_arbiter_init(void)
 
 #ifndef CONFIG_ETRAX_KGDB
 	/* Global watch for writes to kernel text segment. */
-	crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+	crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext,
 			      arbiter_all_clients, arbiter_all_write, NULL);
 #endif
 }
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index a01636a12a6e..d98131c45bb5 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -42,7 +42,7 @@ void (*nmi_handler)(struct pt_regs *);
 void show_trace(unsigned long *stack)
 {
 	unsigned long addr, module_start, module_end;
-	extern char _stext, _etext;
+	extern char _stext[], _etext[];
 	int i;
 
 	pr_err("\nCall Trace: ");
@@ -69,8 +69,8 @@ void show_trace(unsigned long *stack)
 		 * down the cause of the crash will be able to figure
 		 * out the call path that was taken.
 		 */
-		if (((addr >= (unsigned long)&_stext) &&
-		     (addr <= (unsigned long)&_etext)) ||
+		if (((addr >= (unsigned long)_stext) &&
+		     (addr <= (unsigned long)_etext)) ||
 		    ((addr >= module_start) && (addr <= module_end))) {
 #ifdef CONFIG_KALLSYMS
 			print_ip_sym(addr);
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 2e1da71e27a4..ab346f5f8820 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,7 +7,8 @@
 #include <asm/errno.h>
 #include <linux/uaccess.h>
 
-extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
+extern int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr);
 
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index f1e3b20dce9f..9abf02d6855a 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -102,5 +102,7 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index d155ca9e5098..37f7b2bf7f73 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -186,20 +186,10 @@ static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_o
 /*
  * do the futex operations
  */
-int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 
 	switch (op) {
@@ -225,18 +215,9 @@ int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS; break;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
 
 	return ret;
 
-} /* end futex_atomic_op_inuser() */
+} /* end arch_futex_atomic_op_inuser() */
diff --git a/arch/h8300/include/asm/traps.h b/arch/h8300/include/asm/traps.h
index 15e701130b27..1c5a30ec2df8 100644
--- a/arch/h8300/include/asm/traps.h
+++ b/arch/h8300/include/asm/traps.h
@@ -33,9 +33,9 @@ extern unsigned long *_interrupt_redirect_table;
 #define TRAP2_VEC 10
 #define TRAP3_VEC 11
 
-extern char _start, _etext;
+extern char _start[], _etext[];
 #define check_kernel_text(addr) \
-	((addr >= (unsigned long)(&_start)) && \
-	 (addr <  (unsigned long)(&_etext)) && !(addr & 1))
+	((addr >= (unsigned long)(_start)) && \
+	 (addr <  (unsigned long)(_etext)) && !(addr & 1))
 
 #endif /* _H8300_TRAPS_H */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index a62ba368b27d..fb3dfb2a667e 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new)
 	);
 }
 
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
 /**
  * atomic_read - reads a word, atomically
  * @v: pointer to atomic value
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index 7e597f8434da..c607b77c8215 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -31,18 +31,9 @@
 
 
 static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -72,30 +63,9 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (oldval == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (oldval != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (oldval < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (oldval >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (oldval <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (oldval > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index a1c55788c5d6..53a8d5885887 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -179,11 +179,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  */
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
 #define arch_spin_is_locked(x) ((x)->lock != 0)
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index a3d0211970e9..c86a947f5368 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,8 +112,6 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
 	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
 }
 
-#define acpi_unlazy_tlb(x)
-
 #ifdef CONFIG_ACPI_NUMA
 extern cpumask_t early_cpu_possible_map;
 #define for_each_possible_early_cpu(cpu)  \
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index 76acbcd5c060..6d67dc1eaf2b 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -45,18 +45,9 @@ do {									\
 } while (0)
 
 static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -84,17 +75,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index ca9e76149a4a..df2c121164b8 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -76,22 +76,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
 }
 
-static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	int	*p = (int *)&lock->lock, ticket;
-
-	ia64_invala();
-
-	for (;;) {
-		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
-		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
-			return;
-		cpu_relax();
-	}
-
-	smp_acquire__after_ctrl_dep();
-}
-
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
@@ -143,11 +127,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 	arch_spin_lock(lock);
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	__ticket_spin_unlock_wait(lock);
-}
-
 #define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
 #define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 5dd5c5d0d642..002eb85a6941 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -111,4 +111,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 7508c306aa9e..1d29b2f8726b 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -159,12 +159,12 @@ int acpi_request_vector(u32 int_type)
 	return vector;
 }
 
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
 {
-	return __va(phys_addr);
+	return __va(phys);
 }
 
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
 {
 }
 
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 121295637d0d..81416000c5e0 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -757,14 +757,14 @@ efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
 	return 0;
 }
 
-u32
+int
 efi_mem_type (unsigned long phys_addr)
 {
 	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
 
 	if (md)
 		return md->type;
-	return 0;
+	return -EINVAL;
 }
 
 u64
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index 455ce7ddbf14..dfcb0e4eb256 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -95,7 +95,7 @@ static inline unsigned long m32r_flat_get_addr_from_rp (u32 *rp,
 	return ~0;      /* bogus value */
 }
 
-static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
+static inline int flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
 {
         unsigned int reloc = flat_m32r_get_reloc_type (relval);
 	if (reloc & 0xf0) {
@@ -133,6 +133,7 @@ static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
 			break;
 		}
 	}
+	return 0;
 }
 
 // kludge - text_len is a local variable in the only user.
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 323c7fc953cd..a56825592b90 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -30,11 +30,6 @@
 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->slock, VAL > 0);
-}
-
 /**
  * arch_spin_trylock - Try spin lock and return a result
  * @lock: Pointer to the lock variable
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index f8f7b47e247f..e268e51a38d1 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index ddff1164aff0..54191f6fc715 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -49,6 +49,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -465,8 +466,10 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_MSM6242=m
 CONFIG_RTC_DRV_RP5C01=m
 # CONFIG_IOMMU_SUPPORT is not set
@@ -477,7 +480,6 @@ CONFIG_SERIAL_CONSOLE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -587,12 +589,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 17384dc959a5..fb4663904428 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -427,8 +428,10 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_HEARTBEAT=y
@@ -436,7 +439,6 @@ CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -546,12 +548,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 53a641d62f85..4ab393e86e52 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -442,7 +443,10 @@ CONFIG_DMASOUND_ATARI=m
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
+# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_HEARTBEAT=y
@@ -457,7 +461,6 @@ CONFIG_ATARI_DSP56K=m
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -567,12 +570,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 3925ae3a5eb3..1dd8d697545b 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index f4a134b390b4..02b39f50076e 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -430,15 +431,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -548,12 +550,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 9ed0cef632b7..044dcb2bf8fb 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -46,6 +46,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -452,15 +453,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -570,12 +572,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index efed0d48fd53..3ad04682077a 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -56,6 +56,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -520,8 +521,10 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_MSM6242=m
 CONFIG_RTC_DRV_RP5C01=m
 CONFIG_RTC_DRV_GENERIC=m
@@ -540,7 +543,6 @@ CONFIG_SERIAL_CONSOLE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -650,12 +652,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 9040457c7f9c..dc2dd61948cd 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -44,6 +44,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 8b17f00e0484..54e7b523fc3d 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5f3718c62c85..d63d8a15f6db 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -442,8 +443,10 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_HEARTBEAT=y
@@ -451,7 +454,6 @@ CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -561,12 +563,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8c979a68fca5..d0924c22f52a 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -42,6 +42,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -422,15 +423,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -540,12 +542,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a1e79530e806..3001ee1e5dc5 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -42,6 +42,7 @@ CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
 CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_INET=y
@@ -422,15 +423,16 @@ CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 # CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
 CONFIG_FS_ENCRYPTION=m
@@ -540,12 +542,13 @@ CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/include/asm/asm-prototypes.h b/arch/m68k/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..22ccb9c97576
--- /dev/null
+++ b/arch/m68k/include/asm/asm-prototypes.h
@@ -0,0 +1,5 @@
+extern int __divsi3(int, int);
+extern int __modsi3(int, int);
+extern int __mulsi3(int, int);
+extern unsigned int __udivsi3(unsigned int, unsigned int);
+extern unsigned int __umodsi3(unsigned int, unsigned int);
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 8aa8792e3174..d96348a52362 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -357,6 +357,17 @@ static void cuda_shutdown(void)
 	struct adb_request req;
 	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN) < 0)
 		return;
+
+	/* Avoid infinite polling loop when PSU is not under Cuda control */
+	switch (macintosh_config->ident) {
+	case MAC_MODEL_C660:
+	case MAC_MODEL_Q605:
+	case MAC_MODEL_Q605_ACC:
+	case MAC_MODEL_P475:
+	case MAC_MODEL_P475F:
+		return;
+	}
+
 	while (!req.complete)
 		cuda_poll();
 }
@@ -463,8 +474,9 @@ void mac_poweroff(void)
 		pmu_shutdown();
 #endif
 	}
-	local_irq_enable();
+
 	pr_crit("It is now safe to turn off your Macintosh.\n");
+	local_irq_disable();
 	while(1);
 }
 
@@ -554,8 +566,8 @@ void mac_reset(void)
 	}
 
 	/* should never get here */
-	local_irq_enable();
 	pr_crit("Restart failed. Please restart manually.\n");
+	local_irq_disable();
 	while(1);
 }
 
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index 5b7a45d99cfb..7d8b322e5101 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -26,6 +26,7 @@ config METAG
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNDERSCORE_SYMBOL_PREFIX
 	select IRQ_DOMAIN
+	select GENERIC_IRQ_EFFECTIVE_AFF_MASK
 	select MODULES_USE_ELF_RELA
 	select OF
 	select OF_EARLY_FLATTREE
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 6c1380a8a0d4..eee779f26cc4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i)
 	return i;
 }
 
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
 #define ATOMIC_OP(op, c_op)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index c0c7a22be1ae..ddf7fe5708a6 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -15,11 +15,6 @@
  * locked.
  */
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
 #define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
index e95f874ded1b..707c7f7b6bea 100644
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -4,7 +4,6 @@
 #ifdef CONFIG_NUMA
 
 #define cpu_to_node(cpu)	((void)(cpu), 0)
-#define parent_node(node)	((void)(node), 0)
 
 #define cpumask_of_node(node)	((void)node, cpu_online_mask)
 
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index f23c3d266bae..3d2747d4c967 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -60,7 +60,7 @@ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
  * unaligned.
  */
 
-static inline void
+static inline int
 flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 relval)
 {
 	u32 *p = (__force u32 *)rp;
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index 01848f056f43..a9dad9e5e132 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,18 +29,9 @@
 })
 
 static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -66,30 +57,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (oldval == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (oldval != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (oldval < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (oldval >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (oldval <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (oldval > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
index 7d32fbbca962..3598d58aac30 100644
--- a/arch/mips/configs/pistachio_defconfig
+++ b/arch/mips/configs/pistachio_defconfig
@@ -207,7 +207,7 @@ CONFIG_IMGPDC_WDT=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_GPIO=y
 CONFIG_MEDIA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
 # CONFIG_RC_DECODERS is not set
 CONFIG_RC_DEVICES=y
 CONFIG_IR_IMG=y
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 1de190bdfb9c..a9e61ea54ca9 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -83,18 +83,9 @@
 }
 
 static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -125,17 +116,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 655e2fb5395b..da3216007fe0 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -91,20 +91,12 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 882823bec153..6c755bc07975 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -120,4 +120,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 6bace7695788..c7cbddfcdc3b 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
 static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
-static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
 
 void tick_broadcast(const struct cpumask *mask)
 {
 	atomic_t *count;
-	struct call_single_data *csd;
+	call_single_data_t *csd;
 	int cpu;
 
 	for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
 
 static int __init tick_broadcast_init(void)
 {
-	struct call_single_data *csd;
+	call_single_data_t *csd;
 	int cpu;
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 3f87b96da5c4..7646891c4e9b 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -113,6 +113,7 @@ struct jit_ctx {
 	u64 *reg_val_types;
 	unsigned int long_b_conversion:1;
 	unsigned int gen_b_offsets:1;
+	unsigned int use_bbit_insns:1;
 };
 
 static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
@@ -655,19 +656,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
 	return build_int_epilogue(ctx, MIPS_R_T9);
 }
 
-static bool use_bbit_insns(void)
-{
-	switch (current_cpu_type()) {
-	case CPU_CAVIUM_OCTEON:
-	case CPU_CAVIUM_OCTEON_PLUS:
-	case CPU_CAVIUM_OCTEON2:
-	case CPU_CAVIUM_OCTEON3:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static bool is_bad_offset(int b_off)
 {
 	return b_off > 0x1ffff || b_off < -0x20000;
@@ -682,6 +670,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	unsigned int target;
 	u64 t64;
 	s64 t64s;
+	int bpf_op = BPF_OP(insn->code);
 
 	switch (insn->code) {
 	case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
@@ -770,13 +759,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, dst, dst, 0);
 		if (insn->imm == 1) {
 			/* div by 1 is a nop, mod by 1 is zero */
-			if (BPF_OP(insn->code) == BPF_MOD)
+			if (bpf_op == BPF_MOD)
 				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
 			break;
 		}
 		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
 		emit_instr(ctx, divu, dst, MIPS_R_AT);
-		if (BPF_OP(insn->code) == BPF_DIV)
+		if (bpf_op == BPF_DIV)
 			emit_instr(ctx, mflo, dst);
 		else
 			emit_instr(ctx, mfhi, dst);
@@ -798,13 +787,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 
 		if (insn->imm == 1) {
 			/* div by 1 is a nop, mod by 1 is zero */
-			if (BPF_OP(insn->code) == BPF_MOD)
+			if (bpf_op == BPF_MOD)
 				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
 			break;
 		}
 		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
 		emit_instr(ctx, ddivu, dst, MIPS_R_AT);
-		if (BPF_OP(insn->code) == BPF_DIV)
+		if (bpf_op == BPF_DIV)
 			emit_instr(ctx, mflo, dst);
 		else
 			emit_instr(ctx, mfhi, dst);
@@ -829,7 +818,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
 		did_move = false;
 		if (insn->src_reg == BPF_REG_10) {
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK);
 				did_move = true;
 			} else {
@@ -839,7 +828,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		} else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
 			int tmp_reg = MIPS_R_AT;
 
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				tmp_reg = dst;
 				did_move = true;
 			}
@@ -847,7 +836,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
 			src = MIPS_R_AT;
 		}
-		switch (BPF_OP(insn->code)) {
+		switch (bpf_op) {
 		case BPF_MOV:
 			if (!did_move)
 				emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
@@ -879,7 +868,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
 			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
 			emit_instr(ctx, ddivu, dst, src);
-			if (BPF_OP(insn->code) == BPF_DIV)
+			if (bpf_op == BPF_DIV)
 				emit_instr(ctx, mflo, dst);
 			else
 				emit_instr(ctx, mfhi, dst);
@@ -923,7 +912,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) {
 			int tmp_reg = MIPS_R_AT;
 
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				tmp_reg = dst;
 				did_move = true;
 			}
@@ -931,7 +920,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, tmp_reg, src, 0);
 			src = MIPS_R_AT;
 		}
-		switch (BPF_OP(insn->code)) {
+		switch (bpf_op) {
 		case BPF_MOV:
 			if (!did_move)
 				emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
@@ -962,7 +951,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
 			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
 			emit_instr(ctx, divu, dst, src);
-			if (BPF_OP(insn->code) == BPF_DIV)
+			if (bpf_op == BPF_DIV)
 				emit_instr(ctx, mflo, dst);
 			else
 				emit_instr(ctx, mfhi, dst);
@@ -989,7 +978,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		break;
 	case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
 	case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
-		cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+		cmp_eq = (bpf_op == BPF_JEQ);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
@@ -1002,8 +991,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		goto jeq_common;
 	case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
 	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
 	case BPF_JMP | BPF_JSET | BPF_X:
@@ -1020,33 +1013,39 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
 			dst = MIPS_R_AT;
 		}
-		if (BPF_OP(insn->code) == BPF_JSET) {
+		if (bpf_op == BPF_JSET) {
 			emit_instr(ctx, and, MIPS_R_AT, dst, src);
 			cmp_eq = false;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JSGT) {
+		} else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) {
 			emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
 			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
 					return -E2BIG;
-				emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				if (bpf_op == BPF_JSGT)
+					emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				else
+					emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
 				emit_instr(ctx, nop);
 				return 2; /* We consumed the exit. */
 			}
 			b_off = b_imm(this_idx + insn->off + 1, ctx);
 			if (is_bad_offset(b_off))
 				return -E2BIG;
-			emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			if (bpf_op == BPF_JSGT)
+				emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			else
+				emit_instr(ctx, blez, MIPS_R_AT, b_off);
 			emit_instr(ctx, nop);
 			break;
-		} else if (BPF_OP(insn->code) == BPF_JSGE) {
+		} else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) {
 			emit_instr(ctx, slt, MIPS_R_AT, dst, src);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JSGE;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JGT) {
+		} else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) {
 			/* dst or src could be AT */
 			emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
 			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
@@ -1054,16 +1053,16 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8);
 			emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8);
 			emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JGT;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JGE) {
+		} else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) {
 			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JGE;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
 		} else { /* JNE/JEQ case */
-			cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+			cmp_eq = (bpf_op == BPF_JEQ);
 		}
 jeq_common:
 		/*
@@ -1122,7 +1121,9 @@ jeq_common:
 		break;
 	case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
 	case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
-		cmp_eq = (BPF_OP(insn->code) == BPF_JSGE);
+	case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */
+		cmp_eq = (bpf_op == BPF_JSGE);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
@@ -1132,65 +1133,92 @@ jeq_common:
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
 					return -E2BIG;
-				if (cmp_eq)
-					emit_instr(ctx, bltz, dst, b_off);
-				else
+				switch (bpf_op) {
+				case BPF_JSGT:
 					emit_instr(ctx, blez, dst, b_off);
+					break;
+				case BPF_JSGE:
+					emit_instr(ctx, bltz, dst, b_off);
+					break;
+				case BPF_JSLT:
+					emit_instr(ctx, bgez, dst, b_off);
+					break;
+				case BPF_JSLE:
+					emit_instr(ctx, bgtz, dst, b_off);
+					break;
+				}
 				emit_instr(ctx, nop);
 				return 2; /* We consumed the exit. */
 			}
 			b_off = b_imm(this_idx + insn->off + 1, ctx);
 			if (is_bad_offset(b_off))
 				return -E2BIG;
-			if (cmp_eq)
-				emit_instr(ctx, bgez, dst, b_off);
-			else
+			switch (bpf_op) {
+			case BPF_JSGT:
 				emit_instr(ctx, bgtz, dst, b_off);
+				break;
+			case BPF_JSGE:
+				emit_instr(ctx, bgez, dst, b_off);
+				break;
+			case BPF_JSLT:
+				emit_instr(ctx, bltz, dst, b_off);
+				break;
+			case BPF_JSLE:
+				emit_instr(ctx, blez, dst, b_off);
+				break;
+			}
 			emit_instr(ctx, nop);
 			break;
 		}
 		/*
 		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT"
+		 * to generate "GT" and imm -1 to generate LE
 		 */
-		t64s = insn->imm + (cmp_eq ? 0 : 1);
+		if (bpf_op == BPF_JSGT)
+			t64s = insn->imm + 1;
+		else if (bpf_op == BPF_JSLE)
+			t64s = insn->imm + 1;
+		else
+			t64s = insn->imm;
+
+		cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE;
 		if (t64s >= S16_MIN && t64s <= S16_MAX) {
 			emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
 			src = MIPS_R_AT;
 			dst = MIPS_R_ZERO;
-			cmp_eq = true;
 			goto jeq_common;
 		}
 		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
 		emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
 		src = MIPS_R_AT;
 		dst = MIPS_R_ZERO;
-		cmp_eq = true;
 		goto jeq_common;
 
 	case BPF_JMP | BPF_JGT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
-		cmp_eq = (BPF_OP(insn->code) == BPF_JGE);
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+		cmp_eq = (bpf_op == BPF_JGE);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
 		/*
 		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT"
+		 * to generate "GT" and imm -1 to generate LE
 		 */
-		t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1);
-		if (t64s >= 0 && t64s <= S16_MAX) {
-			emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s);
-			src = MIPS_R_AT;
-			dst = MIPS_R_ZERO;
-			cmp_eq = true;
-			goto jeq_common;
-		}
+		if (bpf_op == BPF_JGT)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else if (bpf_op == BPF_JLE)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else
+			t64s = (u64)(u32)(insn->imm);
+
+		cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE;
+
 		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
 		emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
 		src = MIPS_R_AT;
 		dst = MIPS_R_ZERO;
-		cmp_eq = true;
 		goto jeq_common;
 
 	case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
@@ -1198,7 +1226,7 @@ jeq_common:
 		if (dst < 0)
 			return dst;
 
-		if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) {
+		if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) {
 			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
@@ -1724,10 +1752,14 @@ static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
 			case BPF_JEQ:
 			case BPF_JGT:
 			case BPF_JGE:
+			case BPF_JLT:
+			case BPF_JLE:
 			case BPF_JSET:
 			case BPF_JNE:
 			case BPF_JSGT:
 			case BPF_JSGE:
+			case BPF_JSLT:
+			case BPF_JSLE:
 				if (follow_taken) {
 					rvt[idx] |= RVT_BRANCH_TAKEN;
 					idx += insn->off;
@@ -1853,6 +1885,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	memset(&ctx, 0, sizeof(ctx));
 
+	preempt_disable();
+	switch (current_cpu_type()) {
+	case CPU_CAVIUM_OCTEON:
+	case CPU_CAVIUM_OCTEON_PLUS:
+	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
+		ctx.use_bbit_insns = 1;
+		break;
+	default:
+		ctx.use_bbit_insns = 0;
+	}
+	preempt_enable();
+
 	ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
 	if (ctx.offsets == NULL)
 		goto out_err;
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 9c7b8f7942d8..fe413b41df6c 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -26,11 +26,6 @@
 
 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index c710db354ff2..ac82a3f26dbf 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h
index 778087341977..8fed278a24b8 100644
--- a/arch/openrisc/include/asm/futex.h
+++ b/arch/openrisc/include/asm/futex.h
@@ -30,20 +30,10 @@
 })
 
 static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 
 	switch (op) {
@@ -68,30 +58,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (oldval == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (oldval != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (oldval < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (oldval >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (oldval <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (oldval > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index dda1f558ef35..13648519bd41 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,9 @@ config PARISC
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_WANTS_UBSAN_NO_NULL
+	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select RTC_CLASS
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
@@ -17,6 +20,12 @@ config PARISC
 	select BUG
 	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
+	select HAVE_KERNEL_BZIP2
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZ4
+	select HAVE_KERNEL_LZMA
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
 	select GENERIC_ATOMIC64 if !64BIT
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PCI_IOMAP
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 75cb451b1f03..58fae5d2449d 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -24,15 +24,20 @@ KBUILD_DEFCONFIG := default_defconfig
 NM		= sh $(srctree)/arch/parisc/nm
 CHECKFLAGS	+= -D__hppa__=1
 LIBGCC		= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+export LIBGCC
 
 ifdef CONFIG_64BIT
 UTS_MACHINE	:= parisc64
 CHECKFLAGS	+= -D__LP64__=1 -m64
 CC_ARCHES	= hppa64
+LD_BFD		:= elf64-hppa-linux
 else # 32-bit
 CC_ARCHES	= hppa hppa2.0 hppa1.1
+LD_BFD		:= elf32-hppa-linux
 endif
 
+export LD_BFD
+
 ifneq ($(SUBARCH),$(UTS_MACHINE))
 	ifeq ($(CROSS_COMPILE),)
 		CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
@@ -88,6 +93,8 @@ libs-y	+= arch/parisc/lib/ $(LIBGCC)
 
 drivers-$(CONFIG_OPROFILE)		+= arch/parisc/oprofile/
 
+boot	:= arch/parisc/boot
+
 PALO := $(shell if (which palo 2>&1); then : ; \
 	elif [ -x /sbin/palo ]; then echo /sbin/palo; \
 	fi)
@@ -116,11 +123,14 @@ INSTALL_TARGETS = zinstall install
 
 PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
 
-bzImage zImage: vmlinuz
+zImage: vmlinuz
 Image: vmlinux
 
-vmlinuz: vmlinux
-	@gzip -cf -9 $< > $@
+bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+vmlinuz: bzImage
+	$(OBJCOPY) $(boot)/bzImage $@
 
 install:
 	$(CONFIG_SHELL) $(src)/arch/parisc/install.sh \
diff --git a/arch/parisc/boot/.gitignore b/arch/parisc/boot/.gitignore
new file mode 100644
index 000000000000..017d5912ad2d
--- /dev/null
+++ b/arch/parisc/boot/.gitignore
@@ -0,0 +1,2 @@
+image
+bzImage
diff --git a/arch/parisc/boot/Makefile b/arch/parisc/boot/Makefile
new file mode 100644
index 000000000000..cad68a584884
--- /dev/null
+++ b/arch/parisc/boot/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for the linux parisc-specific parts of the boot image creator.
+#
+
+COMPILE_VERSION := __linux_compile_version_id__`hostname |  \
+			tr -c '[0-9A-Za-z]' '_'`__`date | \
+			tr -c '[0-9A-Za-z]' '_'`_t
+
+ccflags-y  := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
+
+targets := image
+targets += bzImage
+subdir- := compressed
+
+$(obj)/image: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+install: $(CONFIGURE) $(obj)/bzImage
+	sh -x  $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+	      System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/boot/compressed/.gitignore b/arch/parisc/boot/compressed/.gitignore
new file mode 100644
index 000000000000..ae06b9b4c02f
--- /dev/null
+++ b/arch/parisc/boot/compressed/.gitignore
@@ -0,0 +1,3 @@
+sizes.h
+vmlinux
+vmlinux.lds
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
new file mode 100644
index 000000000000..5450a11c9d10
--- /dev/null
+++ b/arch/parisc/boot/compressed/Makefile
@@ -0,0 +1,86 @@
+#
+# linux/arch/parisc/boot/compressed/Makefile
+#
+# create a compressed self-extracting vmlinux image from the original vmlinux
+#
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head.o real2.o firmware.o
+
+KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs
+ifndef CONFIG_64BIT
+KBUILD_CFLAGS += -mfast-indirect-calls
+endif
+
+OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o
+
+# LDFLAGS_vmlinux := -X --whole-archive -e startup -T
+LDFLAGS_vmlinux := -X -e startup --as-needed -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC)
+	$(call if_changed,ld)
+
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p'
+
+quiet_cmd_sizes = GEN $@
+      cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+
+$(obj)/sizes.h: vmlinux
+	$(call if_changed,sizes)
+
+AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER
+$(obj)/head.o: $(obj)/sizes.h
+
+CFLAGS_misc.o += -I$(objtree)/$(obj)
+$(obj)/misc.o: $(obj)/sizes.h
+
+$(obj)/firmware.o: $(obj)/firmware.c
+$(obj)/firmware.c: $(srctree)/arch/$(SRCARCH)/kernel/firmware.c
+	$(call cmd,shipped)
+
+AFLAGS_real2.o += -DBOOTLOADER
+$(obj)/real2.o: $(obj)/real2.S
+$(obj)/real2.S: $(srctree)/arch/$(SRCARCH)/kernel/real2.S
+	$(call cmd,shipped)
+
+$(obj)/misc.o: $(obj)/sizes.h
+
+CPPFLAGS_vmlinux.lds += -I$(objtree)/$(obj) -DBOOTLOADER
+$(obj)/vmlinux.lds: $(obj)/sizes.h
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
+	$(call if_changed,objcopy)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+
+suffix-$(CONFIG_KERNEL_GZIP)  := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4)  := lz4
+suffix-$(CONFIG_KERNEL_LZMA)  := lzma
+suffix-$(CONFIG_KERNEL_LZO)  := lzo
+suffix-$(CONFIG_KERNEL_XZ)  := xz
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+	$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+	$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+	$(call if_changed,lz4)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+	$(call if_changed,lzma)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+	$(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+	$(call if_changed,xzkern)
+
+LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+	$(call if_changed,ld)
diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S
new file mode 100644
index 000000000000..5aba20fa48aa
--- /dev/null
+++ b/arch/parisc/boot/compressed/head.S
@@ -0,0 +1,85 @@
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ *   (C) 2017 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/psw.h>
+#include <asm/pdc.h>
+#include <asm/assembly.h>
+#include "sizes.h"
+
+#define BOOTADDR(x)	(x)
+
+#ifndef CONFIG_64BIT
+	.import	$global$		/* forward declaration */
+#endif /*!CONFIG_64BIT*/
+
+	__HEAD
+
+ENTRY(startup)
+	 .level LEVEL
+
+#define PSW_W_SM	0x200
+#define PSW_W_BIT       36
+
+	;! nuke the W bit, saving original value
+	.level 2.0
+	rsm	PSW_W_SM, %r1
+
+	.level 1.1
+	extrw,u	%r1, PSW_W_BIT-32, 1, %r1
+	copy	%r1, %arg0
+
+	/* Make sure sr4-sr7 are set to zero for the kernel address space */
+	mtsp    %r0,%sr4
+	mtsp    %r0,%sr5
+	mtsp    %r0,%sr6
+	mtsp    %r0,%sr7
+
+	/* Clear BSS */
+
+	.import _bss,data
+	.import _ebss,data
+
+	load32	BOOTADDR(_bss),%r3
+	load32	BOOTADDR(_ebss),%r4
+	ldo	FRAME_SIZE(%r4),%sp	/* stack at end of bss */
+$bss_loop:
+	cmpb,<<,n %r3,%r4,$bss_loop
+	stw,ma	%r0,4(%r3)
+
+	/* Initialize the global data pointer */
+	loadgp
+
+	/* arg0..arg4 were set by palo. */
+	copy	%arg1, %r6		/* command line */
+	copy	%arg2, %r7		/* rd-start */
+	copy	%arg3, %r8		/* rd-end */
+	load32	BOOTADDR(decompress_kernel),%r3
+
+#ifdef CONFIG_64BIT
+	.level LEVEL
+	ssm	PSW_W_SM, %r0		/* set W-bit */
+	depdi	0, 31, 32, %r3
+#endif
+	load32	BOOTADDR(startup_continue), %r2
+	bv,n	0(%r3)
+
+startup_continue:
+#ifdef CONFIG_64BIT
+	.level LEVEL
+	rsm	PSW_W_SM, %r0		/* clear W-bit */
+#endif
+
+	load32	KERNEL_BINARY_TEXT_START, %arg0 /* free mem */
+	copy	%r6, %arg1		/* command line */
+	copy	%r7, %arg2		/* rd-start */
+	copy	%r8, %arg3		/* rd-end */
+
+	bv,n	0(%ret0)
+END(startup)
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
new file mode 100644
index 000000000000..13a4bf9ac4da
--- /dev/null
+++ b/arch/parisc/boot/compressed/misc.c
@@ -0,0 +1,301 @@
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ *   (C) 2017 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/unaligned.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+#define malloc	malloc_gzip
+#define free	free_gzip
+
+/* Symbols defined by linker scripts */
+extern char input_data[];
+extern int input_len;
+extern __le32 output_len;	/* at unaligned address, little-endian */
+extern char _text, _end;
+extern char _bss, _ebss;
+extern char _startcode_end;
+extern void startup_continue(void *entry, unsigned long cmdline,
+	unsigned long rd_start, unsigned long rd_end) __noreturn;
+
+void error(char *m) __noreturn;
+
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	const char *s = src;
+	char *d = dest;
+
+	if (d <= s) {
+		while (n--)
+			*d++ = *s++;
+	} else {
+		d += n;
+		s += n;
+		while (n--)
+			*--d = *--s;
+	}
+	return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+	char *xs = (char *)s;
+
+	while (count--)
+		*xs++ = c;
+	return s;
+}
+
+void *memcpy(void *d, const void *s, size_t len)
+{
+	char *dest = (char *)d;
+	const char *source = (const char *)s;
+
+	while (len--)
+		*dest++ = *source++;
+	return d;
+}
+
+size_t strlen(const char *s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		;
+	return sc - s;
+}
+
+char *strchr(const char *s, int c)
+{
+	while (*s) {
+		if (*s == (char)c)
+			return (char *)s;
+		++s;
+	}
+	return NULL;
+}
+
+int puts(const char *s)
+{
+	const char *nuline = s;
+
+	while ((nuline = strchr(s, '\n')) != NULL) {
+		if (nuline != s)
+			pdc_iodc_print(s, nuline - s);
+			pdc_iodc_print("\r\n", 2);
+			s = nuline + 1;
+	}
+	if (*s != '\0')
+		pdc_iodc_print(s, strlen(s));
+
+	return 0;
+}
+
+static int putchar(int c)
+{
+	char buf[2];
+
+	buf[0] = c;
+	buf[1] = '\0';
+	puts(buf);
+	return c;
+}
+
+void __noreturn error(char *x)
+{
+	puts("\n\n");
+	puts(x);
+	puts("\n\n -- System halted");
+	while (1)	/* wait forever */
+		;
+}
+
+static int print_hex(unsigned long num)
+{
+	const char hex[] = "0123456789abcdef";
+	char str[40];
+	int i = sizeof(str)-1;
+
+	str[i--] = '\0';
+	do {
+		str[i--] = hex[num & 0x0f];
+		num >>= 4;
+	} while (num);
+
+	str[i--] = 'x';
+	str[i] = '0';
+	puts(&str[i]);
+
+	return 0;
+}
+
+int printf(const char *fmt, ...)
+{
+	va_list args;
+	int i = 0;
+
+	va_start(args, fmt);
+
+	while (fmt[i]) {
+		if (fmt[i] != '%') {
+put:
+			putchar(fmt[i++]);
+			continue;
+		}
+
+		if (fmt[++i] == '%')
+			goto put;
+		++i;
+		print_hex(va_arg(args, unsigned long));
+	}
+
+	va_end(args);
+	return 0;
+}
+
+/* helper functions for libgcc */
+void abort(void)
+{
+	error("aborted.");
+}
+
+#undef malloc
+void *malloc(size_t size)
+{
+	return malloc_gzip(size);
+}
+
+#undef free
+void free(void *ptr)
+{
+	return free_gzip(ptr);
+}
+
+
+static void flush_data_cache(char *start, unsigned long length)
+{
+	char *end = start + length;
+
+	do {
+		asm volatile("fdc 0(%0)" : : "r" (start));
+		asm volatile("fic 0(%%sr0,%0)" : : "r" (start));
+		start += 16;
+	} while (start < end);
+	asm volatile("fdc 0(%0)" : : "r" (end));
+
+	asm ("sync");
+}
+
+unsigned long decompress_kernel(unsigned int started_wide,
+		unsigned int command_line,
+		const unsigned int rd_start,
+		const unsigned int rd_end)
+{
+	char *output;
+	unsigned long len, len_all;
+
+#ifdef CONFIG_64BIT
+	parisc_narrow_firmware = 0;
+#endif
+
+	set_firmware_width_unlocked();
+
+	putchar('U');	/* if you get this p and no more, string storage */
+			/* in $GLOBAL$ is wrong or %dp is wrong */
+	puts("ncompressing ...\n");
+
+	output = (char *) KERNEL_BINARY_TEXT_START;
+	len_all = __pa(SZ_end) - __pa(SZparisc_kernel_start);
+
+	if ((unsigned long) &_startcode_end > (unsigned long) output)
+		error("Bootcode overlaps kernel code");
+
+	len = get_unaligned_le32(&output_len);
+	if (len > len_all)
+		error("Output len too big.");
+	else
+		memset(&output[len], 0, len_all - len);
+
+	/*
+	 * Initialize free_mem_ptr and free_mem_end_ptr.
+	 */
+	free_mem_ptr = (unsigned long) &_ebss;
+	free_mem_ptr += 2*1024*1024;	/* leave 2 MB for stack */
+
+	/* Limit memory for bootoader to 1GB */
+	#define ARTIFICIAL_LIMIT (1*1024*1024*1024)
+	free_mem_end_ptr = PAGE0->imm_max_mem;
+	if (free_mem_end_ptr > ARTIFICIAL_LIMIT)
+		free_mem_end_ptr = ARTIFICIAL_LIMIT;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* if we have ramdisk this is at end of memory */
+	if (rd_start && rd_start < free_mem_end_ptr)
+		free_mem_end_ptr = rd_start;
+#endif
+
+#ifdef DEBUG
+	printf("startcode_end = %x\n", &_startcode_end);
+	printf("commandline   = %x\n", command_line);
+	printf("rd_start      = %x\n", rd_start);
+	printf("rd_end        = %x\n", rd_end);
+
+	printf("free_ptr      = %x\n", free_mem_ptr);
+	printf("free_ptr_end  = %x\n", free_mem_end_ptr);
+
+	printf("input_data    = %x\n", input_data);
+	printf("input_len     = %x\n", input_len);
+	printf("output        = %x\n", output);
+	printf("output_len    = %x\n", len);
+	printf("output_max    = %x\n", len_all);
+#endif
+
+	__decompress(input_data, input_len, NULL, NULL,
+			output, 0, NULL, error);
+
+	flush_data_cache(output, len);
+
+	printf("Booting kernel ...\n\n");
+
+	return (unsigned long) output;
+}
diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000000..a4ce3314e78e
--- /dev/null
+++ b/arch/parisc/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,101 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+#ifndef CONFIG_64BIT
+OUTPUT_FORMAT("elf32-hppa-linux")
+OUTPUT_ARCH(hppa)
+#else
+OUTPUT_FORMAT("elf64-hppa-linux")
+OUTPUT_ARCH(hppa:hppa2.0w)
+#endif
+
+ENTRY(startup)
+
+SECTIONS
+{
+	/* palo loads at 0x60000 */
+	/* loaded kernel will move to 0x10000 */
+	. = 0xe0000;    /* should not overwrite palo code */
+
+	.head.text : {
+		_head = . ;
+		HEAD_TEXT
+		_ehead = . ;
+	}
+
+	/* keep __gp below 0x1000000 */
+#ifdef CONFIG_64BIT
+	. = ALIGN(16);
+	/* Linkage tables */
+	.opd : {
+		*(.opd)
+	} PROVIDE (__gp = .);
+	.plt : {
+		*(.plt)
+	}
+	.dlt : {
+		*(.dlt)
+	}
+#endif
+	_startcode_end = .;
+
+	/* bootloader code and data starts behind area of extracted kernel */
+	. = (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START);
+
+	/* align on next page boundary */
+	. = ALIGN(4096);
+	.text :	{
+		_text = .;	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	. = ALIGN(8);
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	. = ALIGN(8);
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	. = ALIGN(8);
+	.rodata.compressed : {
+		*(.rodata.compressed)
+	}
+	. = ALIGN(8);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(4096);
+		_ebss = .;
+	}
+
+	STABS_DEBUG
+	.note 0 : { *(.note) }
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : {
+#ifdef CONFIG_64BIT
+		/* temporary hack until binutils is fixed to not emit these
+		 * for static binaries
+		 */
+		*(.PARISC.unwind)	/* no unwind data */
+		*(.interp)
+		*(.dynsym)
+		*(.dynstr)
+		*(.dynamic)
+		*(.hash)
+		*(.gnu.hash)
+#endif
+	}
+}
diff --git a/arch/parisc/boot/compressed/vmlinux.scr b/arch/parisc/boot/compressed/vmlinux.scr
new file mode 100644
index 000000000000..dac2d142bcfa
--- /dev/null
+++ b/arch/parisc/boot/compressed/vmlinux.scr
@@ -0,0 +1,10 @@
+SECTIONS
+{
+  .rodata.compressed : {
+	input_len = .;
+	LONG(input_data_end - input_data) input_data = .;
+	*(.data)
+	output_len = . - 4; /* can be at unaligned address */
+	input_data_end = .;
+	}
+}
diff --git a/arch/parisc/boot/install.sh b/arch/parisc/boot/install.sh
new file mode 100644
index 000000000000..8f7c365fad83
--- /dev/null
+++ b/arch/parisc/boot/install.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+#
+# arch/parisc/install.sh, derived from arch/i386/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+
+if [ -n "${INSTALLKERNEL}" ]; then
+  if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+  if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+fi
+
+# Default install
+
+if [ "$(basename $2)" = "zImage" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  base=vmlinuz
+else
+# Normal install
+  echo "Installing normal kernel"
+  base=vmlinux
+fi
+
+if [ -f $4/$base-$1 ]; then
+  mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+  mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
index 0764d3971cf6..8d41a73bd71b 100644
--- a/arch/parisc/configs/c3000_defconfig
+++ b/arch/parisc/configs/c3000_defconfig
@@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_NET_PKTGEN=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 5394b9c5f914..17b98a87e5e2 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 	_atomic_spin_unlock_irqrestore(v, flags);
 }
 
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
 static __inline__ int atomic_read(const atomic_t *v)
 {
 	return READ_ONCE((v)->counter);
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0ba14300cd8e..c601aab2fb36 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -32,22 +32,12 @@ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
 }
 
 static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
 	unsigned long int flags;
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval, ret;
 	u32 tmp;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
-		return -EFAULT;
-
 	_futex_spin_lock_irqsave(uaddr, &flags);
 	pagefault_disable();
 
@@ -85,17 +75,9 @@ out_pagefault_enable:
 	pagefault_enable();
 	_futex_spin_unlock_irqrestore(uaddr, &flags);
 
-	if (ret == 0) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index a81226257878..e4a657094058 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -63,6 +63,9 @@ static inline void switch_mm(struct mm_struct *prev,
 {
 	unsigned long flags;
 
+	if (prev == next)
+		return;
+
 	local_irq_save(flags);
 	switch_mm_irqs_off(prev, next, tsk);
 	local_irq_restore(flags);
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 80e742a1c162..bfed09d80bae 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -116,11 +116,15 @@ extern int npmem_ranges;
 /* This governs the relationship between virtual and physical addresses.
  * If you alter it, make sure to take care of our various fixed mapping
  * segments in fixmap.h */
+#if defined(BOOTLOADER)
+#define __PAGE_OFFSET	(0)		/* bootloader uses physical addresses */
+#else
 #ifdef CONFIG_64BIT
 #define __PAGE_OFFSET	(0x40000000)	/* 1GB */
 #else
 #define __PAGE_OFFSET	(0x10000000)	/* 256MB */
 #endif
+#endif /* BOOTLOADER */
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 7569627a032b..26b4455baa83 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -5,6 +5,8 @@
 
 #if !defined(__ASSEMBLY__)
 
+extern int parisc_narrow_firmware;
+
 extern int pdc_type;
 extern unsigned long parisc_cell_num; /* cell number the CPU runs on (PAT) */
 extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT)	   */
diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
index e3c0586260d8..a468a172ee33 100644
--- a/arch/parisc/include/asm/pdcpat.h
+++ b/arch/parisc/include/asm/pdcpat.h
@@ -223,6 +223,18 @@ struct pdc_pat_mem_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_INFO (return info) */
 	unsigned long clear_time; /* last PDT clear time (since Jan 1970) */
 };
 
+struct pdc_pat_mem_cell_pdt_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_CELL_INFO */
+	u64 reserved:32;
+	u64 cs:1;		/* clear status: cleared since the last call? */
+	u64 current_pdt_entries:15;
+	u64 ic:1;		/* interleaving had to be changed ? */
+	u64 max_pdt_entries:15;
+	unsigned long good_mem;
+	unsigned long first_dbe_loc; /* first location of double bit error */
+	unsigned long clear_time; /* last PDT clear time (since Jan 1970) */
+};
+
+
 struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */
 	unsigned long actual_count_bytes;
 	unsigned long pdt_entries;
@@ -325,6 +337,8 @@ extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *va
 extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); 
 
 extern int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo);
+extern int pdc_pat_mem_pdt_cell_info(struct pdc_pat_mem_cell_pdt_retinfo *rinfo,
+		unsigned long cell);
 extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
 		unsigned long *pdt_entries_ptr, unsigned long max_entries);
 extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index e32936cd7f10..55bfe4affca3 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -14,13 +14,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
-{
-	volatile unsigned int *a = __ldcw_align(x);
-
-	smp_cond_load_acquire(a, VAL);
-}
-
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
 {
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 5979745815a5..775b5d5e41a1 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -40,9 +40,6 @@
 #define MADV_SEQUENTIAL 2               /* expect sequential page references */
 #define MADV_WILLNEED   3               /* will need these pages */
 #define MADV_DONTNEED   4               /* don't need these pages */
-#define MADV_SPACEAVAIL 5               /* insure that resources are reserved */
-#define MADV_VPS_PURGE  6               /* Purge pages from VM page cache */
-#define MADV_VPS_INHERIT 7              /* Inherit parents page size */
 
 /* common/generic parameters */
 #define MADV_FREE	8		/* free pages only if memory pressure */
@@ -60,21 +57,16 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	70		/* Clear the MADV_NODUMP flag */
 
+#define MADV_WIPEONFORK 71		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 72		/* Undo MADV_WIPEONFORK */
+
+#define MADV_HWPOISON     100		/* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
+
 /* compatibility flags */
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a0d4dc9f4eb2..3b2bf7ae703b 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_PEERGROUPS		0x4034
 
+#define SO_ZEROCOPY		0x4035
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index f622a311d04a..ab80e5c6f651 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -69,7 +69,15 @@
 #include <asm/pdcpat.h>
 #include <asm/processor.h>	/* for boot_cpu_data */
 
+#if defined(BOOTLOADER)
+# undef  spin_lock_irqsave
+# define spin_lock_irqsave(a, b) { b = 1; }
+# undef  spin_unlock_irqrestore
+# define spin_unlock_irqrestore(a, b)
+#else
 static DEFINE_SPINLOCK(pdc_lock);
+#endif
+
 extern unsigned long pdc_result[NUM_PDC_RESULT];
 extern unsigned long pdc_result2[NUM_PDC_RESULT];
 
@@ -142,8 +150,8 @@ static void convert_to_wide(unsigned long *addr)
 	int i;
 	unsigned int *p = (unsigned int *)addr;
 
-	if(unlikely(parisc_narrow_firmware)) {
-		for(i = 31; i >= 0; --i)
+	if (unlikely(parisc_narrow_firmware)) {
+		for (i = (NUM_PDC_RESULT-1); i >= 0; --i)
 			addr[i] = p[i];
 	}
 #endif
@@ -186,6 +194,8 @@ void set_firmware_width(void)
 }
 #endif /*CONFIG_64BIT*/
 
+
+#if !defined(BOOTLOADER)
 /**
  * pdc_emergency_unlock - Unlock the linux pdc lock
  *
@@ -979,16 +989,22 @@ int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *pret,
 
 	spin_lock_irqsave(&pdc_lock, flags);
 	retval = mem_pdc_call(PDC_MEM, PDC_MEM_READ_PDT, __pa(pdc_result),
-			__pa(pdc_result2));
+			__pa(pdt_entries_ptr));
 	if (retval == PDC_OK) {
 		convert_to_wide(pdc_result);
 		memcpy(pret, pdc_result, sizeof(*pret));
-		convert_to_wide(pdc_result2);
-		memcpy(pdt_entries_ptr, pdc_result2,
-			pret->pdt_entries * sizeof(*pdt_entries_ptr));
 	}
 	spin_unlock_irqrestore(&pdc_lock, flags);
 
+#ifdef CONFIG_64BIT
+	/*
+	 * 64-bit kernels should not call this PDT function in narrow mode.
+	 * The pdt_entries_ptr array above will now contain 32-bit values
+	 */
+	if (WARN_ON_ONCE((retval == PDC_OK) && parisc_narrow_firmware))
+		return PDC_ERROR;
+#endif
+
 	return retval;
 }
 
@@ -1143,6 +1159,8 @@ void pdc_io_reset_devices(void)
 	spin_unlock_irqrestore(&pdc_lock, flags);
 }
 
+#endif /* defined(BOOTLOADER) */
+
 /* locked by pdc_console_lock */
 static int __attribute__((aligned(8)))   iodc_retbuf[32];
 static char __attribute__((aligned(64))) iodc_dbuf[4096];
@@ -1187,6 +1205,7 @@ print:
 	return i;
 }
 
+#if !defined(BOOTLOADER)
 /**
  * pdc_iodc_getc - Read a character (non-blocking) from the PDC console.
  *
@@ -1440,6 +1459,29 @@ int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo)
 }
 
 /**
+ * pdc_pat_mem_pdt_cell_info - Retrieve information about page deallocation
+ *				table of a cell
+ * @rinfo: memory pdt information
+ * @cell: cell number
+ *
+ */
+int pdc_pat_mem_pdt_cell_info(struct pdc_pat_mem_cell_pdt_retinfo *rinfo,
+		unsigned long cell)
+{
+	int retval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_INFO,
+			__pa(&pdc_result), cell);
+	if (retval == PDC_OK)
+		memcpy(rinfo, &pdc_result, sizeof(*rinfo));
+	spin_unlock_irqrestore(&pdc_lock, flags);
+
+	return retval;
+}
+
+/**
  * pdc_pat_mem_read_cell_pdt - Read PDT entries from (old) PAT firmware
  * @pret: array of PDT entries
  * @pdt_entries_ptr: ptr to hold number of PDT entries
@@ -1455,14 +1497,14 @@ int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
 	spin_lock_irqsave(&pdc_lock, flags);
 	/* PDC_PAT_MEM_CELL_READ is available on early PAT machines only */
 	retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_READ,
-			__pa(&pdc_result), parisc_cell_num, __pa(&pdc_result2));
+			__pa(&pdc_result), parisc_cell_num,
+			__pa(pdt_entries_ptr));
 
 	if (retval == PDC_OK) {
 		/* build up return value as for PDC_PAT_MEM_PD_READ */
 		entries = min(pdc_result[0], max_entries);
 		pret->pdt_entries = entries;
 		pret->actual_count_bytes = entries * sizeof(unsigned long);
-		memcpy(pdt_entries_ptr, &pdc_result2, pret->actual_count_bytes);
 	}
 
 	spin_unlock_irqrestore(&pdc_lock, flags);
@@ -1474,6 +1516,8 @@ int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
  * pdc_pat_mem_read_pd_pdt - Read PDT entries from (newer) PAT firmware
  * @pret: array of PDT entries
  * @pdt_entries_ptr: ptr to hold number of PDT entries
+ * @count: number of bytes to read
+ * @offset: offset to start (in bytes)
  *
  */
 int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
@@ -1524,6 +1568,7 @@ int pdc_pat_mem_get_dimm_phys_location(
 	return retval;
 }
 #endif /* CONFIG_64BIT */
+#endif /* defined(BOOTLOADER) */
 
 
 /***************** 32-bit real-mode calls ***********/
@@ -1633,4 +1678,3 @@ long real64_call(unsigned long fn, ...)
 }
 
 #endif /* CONFIG_64BIT */
-
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 5f0067a62738..bd4c0a7471d3 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -41,7 +41,7 @@ static unsigned long pcxl_used_bytes __read_mostly = 0;
 static unsigned long pcxl_used_pages __read_mostly = 0;
 
 extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
-static spinlock_t   pcxl_res_lock;
+static DEFINE_SPINLOCK(pcxl_res_lock);
 static char    *pcxl_res_map;
 static int     pcxl_res_hint;
 static int     pcxl_res_size;
@@ -390,7 +390,6 @@ pcxl_dma_init(void)
 	if (pcxl_dma_start == 0)
 		return 0;
 
-	spin_lock_init(&pcxl_res_lock);
 	pcxl_res_size = PCXL_DMA_MAP_SIZE >> (PAGE_SHIFT + 3);
 	pcxl_res_hint = 0;
 	pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index d02874ecb94d..05730a83895c 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -1,19 +1,20 @@
 /*
  *    Page Deallocation Table (PDT) support
  *
- *    The Page Deallocation Table (PDT) holds a table with pointers to bad
- *    memory (broken RAM modules) which is maintained by firmware.
+ *    The Page Deallocation Table (PDT) is maintained by firmware and holds a
+ *    list of memory addresses in which memory errors were detected.
+ *    The list contains both single-bit (correctable) and double-bit
+ *    (uncorrectable) errors.
  *
  *    Copyright 2017 by Helge Deller <deller@gmx.de>
  *
- *    TODO:
- *    - check regularily for new bad memory
- *    - add userspace interface with procfs or sysfs
- *    - increase number of PDT entries dynamically
+ *    possible future enhancements:
+ *    - add userspace interface via procfs or sysfs to clear PDT
  */
 
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/kthread.h>
 
 #include <asm/pdc.h>
 #include <asm/pdcpat.h>
@@ -24,11 +25,16 @@ enum pdt_access_type {
 	PDT_NONE,
 	PDT_PDC,
 	PDT_PAT_NEW,
-	PDT_PAT_OLD
+	PDT_PAT_CELL
 };
 
 static enum pdt_access_type pdt_type;
 
+/* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
+#define PDT_POLL_INTERVAL_DEFAULT	(5*60*HZ)
+#define PDT_POLL_INTERVAL_SHORT		(1*60*HZ)
+static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;
+
 /* global PDT status information */
 static struct pdc_mem_retinfo pdt_status;
 
@@ -36,6 +42,21 @@ static struct pdc_mem_retinfo pdt_status;
 #define MAX_PDT_ENTRIES		(MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
 static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
 
+/*
+ * Constants for the pdt_entry format:
+ * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
+ * reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
+ * The perm bit indicates whether the error have been verified as a permanent
+ * error (value of 1) or has not been verified, and may be transient (value
+ * of 0). The error_type bit indicates whether the error is a single bit error
+ * (value of 1) or a multiple bit error.
+ * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
+ * 63. Those machines don't provide the perm bit.
+ */
+
+#define PDT_ADDR_PHYS_MASK	(pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
+#define PDT_ADDR_PERM_ERR	(pdt_type != PDT_PDC ? 2UL : 0UL)
+#define PDT_ADDR_SINGLE_ERR	1UL
 
 /* report PDT entries via /proc/meminfo */
 void arch_report_meminfo(struct seq_file *m)
@@ -49,6 +70,68 @@ void arch_report_meminfo(struct seq_file *m)
 			pdt_status.pdt_entries);
 }
 
+static int get_info_pat_new(void)
+{
+	struct pdc_pat_mem_retinfo pat_rinfo;
+	int ret;
+
+	/* newer PAT machines like C8000 report info for all cells */
+	if (is_pdc_pat())
+		ret = pdc_pat_mem_pdt_info(&pat_rinfo);
+	else
+		return PDC_BAD_PROC;
+
+	pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
+	pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
+	pdt_status.pdt_status = 0;
+	pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
+	pdt_status.good_mem = pat_rinfo.good_mem;
+
+	return ret;
+}
+
+static int get_info_pat_cell(void)
+{
+	struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
+	int ret;
+
+	/* older PAT machines like rp5470 report cell info only */
+	if (is_pdc_pat())
+		ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
+	else
+		return PDC_BAD_PROC;
+
+	pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
+	pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
+	pdt_status.pdt_status = 0;
+	pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
+	pdt_status.good_mem = cell_rinfo.good_mem;
+
+	return ret;
+}
+
+static void report_mem_err(unsigned long pde)
+{
+	struct pdc_pat_mem_phys_mem_location loc;
+	unsigned long addr;
+	char dimm_txt[32];
+
+	addr = pde & PDT_ADDR_PHYS_MASK;
+
+	/* show DIMM slot description on PAT machines */
+	if (is_pdc_pat()) {
+		pdc_pat_mem_get_dimm_phys_location(&loc, addr);
+		sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
+	} else
+		dimm_txt[0] = 0;
+
+	pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
+		addr, dimm_txt,
+		pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
+		pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
+}
+
+
 /*
  * pdc_pdt_init()
  *
@@ -63,18 +146,17 @@ void __init pdc_pdt_init(void)
 	unsigned long entries;
 	struct pdc_mem_read_pdt pdt_read_ret;
 
-	if (is_pdc_pat()) {
-		struct pdc_pat_mem_retinfo pat_rinfo;
+	pdt_type = PDT_PAT_NEW;
+	ret = get_info_pat_new();
 
-		pdt_type = PDT_PAT_NEW;
-		ret = pdc_pat_mem_pdt_info(&pat_rinfo);
-		pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
-		pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
-		pdt_status.pdt_status = 0;
-		pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
-		pdt_status.good_mem = pat_rinfo.good_mem;
-	} else {
+	if (ret != PDC_OK) {
+		pdt_type = PDT_PAT_CELL;
+		ret = get_info_pat_cell();
+	}
+
+	if (ret != PDC_OK) {
 		pdt_type = PDT_PDC;
+		/* non-PAT machines provide the standard PDC call */
 		ret = pdc_mem_pdt_info(&pdt_status);
 	}
 
@@ -86,13 +168,17 @@ void __init pdc_pdt_init(void)
 	}
 
 	entries = pdt_status.pdt_entries;
-	WARN_ON(entries > MAX_PDT_ENTRIES);
+	if (WARN_ON(entries > MAX_PDT_ENTRIES))
+		entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;
 
-	pr_info("PDT: size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
-		" good_mem %lu\n",
+	pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
+		" good_mem %lu MB\n",
+			pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
+			pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
+						 : __stringify(PDT_PAT_NEW),
 			pdt_status.pdt_size, pdt_status.pdt_entries,
 			pdt_status.pdt_status, pdt_status.first_dbe_loc,
-			pdt_status.good_mem);
+			pdt_status.good_mem / 1024 / 1024);
 
 	if (entries == 0) {
 		pr_info("PDT: Firmware reports all memory OK.\n");
@@ -112,15 +198,12 @@ void __init pdc_pdt_init(void)
 #ifdef CONFIG_64BIT
 		struct pdc_pat_mem_read_pd_retinfo pat_pret;
 
-		/* try old obsolete PAT firmware function first */
-		pdt_type = PDT_PAT_OLD;
-		ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
-			MAX_PDT_ENTRIES);
-		if (ret != PDC_OK) {
-			pdt_type = PDT_PAT_NEW;
+		if (pdt_type == PDT_PAT_CELL)
+			ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
+				MAX_PDT_ENTRIES);
+		else
 			ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
 				MAX_PDT_TABLE_SIZE, 0);
-		}
 #else
 		ret = PDC_BAD_PROC;
 #endif
@@ -128,27 +211,142 @@ void __init pdc_pdt_init(void)
 
 	if (ret != PDC_OK) {
 		pdt_type = PDT_NONE;
-		pr_debug("PDT type %d, retval = %d\n", pdt_type, ret);
+		pr_warn("PDT: Get PDT entries failed with %d\n", ret);
 		return;
 	}
 
 	for (i = 0; i < pdt_status.pdt_entries; i++) {
-		struct pdc_pat_mem_phys_mem_location loc;
+		report_mem_err(pdt_entry[i]);
+
+		/* mark memory page bad */
+		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
+	}
+}
 
-		/* get DIMM slot number */
-		loc.dimm_slot = 0xff;
+
+/*
+ * This is the PDT kernel thread main loop.
+ */
+
+static int pdt_mainloop(void *unused)
+{
+	struct pdc_mem_read_pdt pdt_read_ret;
+	struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
+	unsigned long old_num_entries;
+	unsigned long *bad_mem_ptr;
+	int num, ret;
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		old_num_entries = pdt_status.pdt_entries;
+
+		schedule_timeout(pdt_poll_interval);
+		if (kthread_should_stop())
+			break;
+
+		/* Do we have new PDT entries? */
+		switch (pdt_type) {
+		case PDT_PAT_NEW:
+			ret = get_info_pat_new();
+			break;
+		case PDT_PAT_CELL:
+			ret = get_info_pat_cell();
+			break;
+		default:
+			ret = pdc_mem_pdt_info(&pdt_status);
+			break;
+		}
+
+		if (ret != PDC_OK) {
+			pr_warn("PDT: unexpected failure %d\n", ret);
+			return -EINVAL;
+		}
+
+		/* if no new PDT entries, just wait again */
+		num = pdt_status.pdt_entries - old_num_entries;
+		if (num <= 0)
+			continue;
+
+		/* decrease poll interval in case we found memory errors */
+		if (pdt_status.pdt_entries &&
+			pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
+			pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;
+
+		/* limit entries to get */
+		if (num > MAX_PDT_ENTRIES) {
+			num = MAX_PDT_ENTRIES;
+			pdt_status.pdt_entries = old_num_entries + num;
+		}
+
+		/* get new entries */
+		switch (pdt_type) {
 #ifdef CONFIG_64BIT
-		pdc_pat_mem_get_dimm_phys_location(&loc, pdt_entry[i]);
+		case PDT_PAT_CELL:
+			if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
+				pr_crit("PDT: too many entries.\n");
+				return -ENOMEM;
+			}
+			ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
+				MAX_PDT_ENTRIES);
+			bad_mem_ptr = &pdt_entry[old_num_entries];
+			break;
+		case PDT_PAT_NEW:
+			ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
+				pdt_entry,
+				num * sizeof(unsigned long),
+				old_num_entries * sizeof(unsigned long));
+			bad_mem_ptr = &pdt_entry[0];
+			break;
 #endif
+		default:
+			ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
+				pdt_entry);
+			bad_mem_ptr = &pdt_entry[old_num_entries];
+			break;
+		}
 
-		pr_warn("PDT: BAD PAGE #%d at 0x%08lx, "
-			"DIMM slot %02x (error_type = %lu)\n",
-			i,
-			pdt_entry[i] & PAGE_MASK,
-			loc.dimm_slot,
-			pdt_entry[i] & 1);
+		/* report and mark memory broken */
+		while (num--) {
+			unsigned long pde = *bad_mem_ptr++;
 
-		/* mark memory page bad */
-		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
+			report_mem_err(pde);
+
+#ifdef CONFIG_MEMORY_FAILURE
+			if ((pde & PDT_ADDR_PERM_ERR) ||
+			    ((pde & PDT_ADDR_SINGLE_ERR) == 0))
+				memory_failure(pde >> PAGE_SHIFT, 0, 0);
+			else
+				soft_offline_page(
+					pfn_to_page(pde >> PAGE_SHIFT), 0);
+#else
+			pr_crit("PDT: memory error at 0x%lx ignored.\n"
+				"Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
+				"for real handling.\n",
+				pde & PDT_ADDR_PHYS_MASK);
+#endif
+
+		}
 	}
+
+	return 0;
 }
+
+
+static int __init pdt_initcall(void)
+{
+	struct task_struct *kpdtd_task;
+
+	if (pdt_type == PDT_NONE)
+		return -ENODEV;
+
+	kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd");
+	if (IS_ERR(kpdtd_task))
+		return PTR_ERR(kpdtd_task);
+
+	wake_up_process(kpdtd_task);
+
+	return 0;
+}
+
+late_initcall(pdt_initcall);
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 6017a5af2e6e..0813359049ae 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -69,7 +69,7 @@ struct rdr_tbl_ent {
 
 static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
 static int perf_enabled __read_mostly;
-static spinlock_t perf_lock;
+static DEFINE_SPINLOCK(perf_lock);
 struct parisc_device *cpu_device __read_mostly;
 
 /* RDRs to write for PCX-W */
@@ -533,8 +533,6 @@ static int __init perf_init(void)
 	/* Patch the images to match the system */
     	perf_patch_images();
 
-	spin_lock_init(&perf_lock);
-
 	/* TODO: this only lets us access the first cpu.. what to do for SMP? */
 	cpu_device = per_cpu(cpu_data, 0).dev;
 	printk("Performance monitoring counters enabled for %s\n",
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 0ab32779dfa7..a778bd3c107c 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <asm/param.h>
@@ -89,7 +90,7 @@ init_percpu_prof(unsigned long cpunum)
  * (return 1).  If so, initialize the chip and tell other partners in crime 
  * they have work to do.
  */
-static int processor_probe(struct parisc_device *dev)
+static int __init processor_probe(struct parisc_device *dev)
 {
 	unsigned long txn_addr;
 	unsigned long cpuid;
@@ -237,28 +238,45 @@ static int processor_probe(struct parisc_device *dev)
  */
 void __init collect_boot_cpu_data(void)
 {
+	unsigned long cr16_seed;
+
 	memset(&boot_cpu_data, 0, sizeof(boot_cpu_data));
 
+	cr16_seed = get_cycles();
+	add_device_randomness(&cr16_seed, sizeof(cr16_seed));
+
 	boot_cpu_data.cpu_hz = 100 * PAGE0->mem_10msec; /* Hz of this PARISC */
 
 	/* get CPU-Model Information... */
 #define p ((unsigned long *)&boot_cpu_data.pdc.model)
-	if (pdc_model_info(&boot_cpu_data.pdc.model) == PDC_OK)
+	if (pdc_model_info(&boot_cpu_data.pdc.model) == PDC_OK) {
 		printk(KERN_INFO 
 			"model %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
 			p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]);
+
+		add_device_randomness(&boot_cpu_data.pdc.model,
+			sizeof(boot_cpu_data.pdc.model));
+	}
 #undef p
 
-	if (pdc_model_versions(&boot_cpu_data.pdc.versions, 0) == PDC_OK)
+	if (pdc_model_versions(&boot_cpu_data.pdc.versions, 0) == PDC_OK) {
 		printk(KERN_INFO "vers  %08lx\n", 
 			boot_cpu_data.pdc.versions);
 
-	if (pdc_model_cpuid(&boot_cpu_data.pdc.cpuid) == PDC_OK)
+		add_device_randomness(&boot_cpu_data.pdc.versions,
+			sizeof(boot_cpu_data.pdc.versions));
+	}
+
+	if (pdc_model_cpuid(&boot_cpu_data.pdc.cpuid) == PDC_OK) {
 		printk(KERN_INFO "CPUID vers %ld rev %ld (0x%08lx)\n",
 			(boot_cpu_data.pdc.cpuid >> 5) & 127,
 			boot_cpu_data.pdc.cpuid & 31,
 			boot_cpu_data.pdc.cpuid);
 
+		add_device_randomness(&boot_cpu_data.pdc.cpuid,
+			sizeof(boot_cpu_data.pdc.cpuid));
+	}
+
 	if (pdc_model_capabilities(&boot_cpu_data.pdc.capabilities) == PDC_OK)
 		printk(KERN_INFO "capabilities 0x%lx\n",
 			boot_cpu_data.pdc.capabilities);
@@ -414,12 +432,12 @@ show_cpuinfo (struct seq_file *m, void *v)
 	return 0;
 }
 
-static const struct parisc_device_id processor_tbl[] = {
+static const struct parisc_device_id processor_tbl[] __initconst = {
 	{ HPHW_NPROC, HVERSION_REV_ANY_ID, HVERSION_ANY_ID, SVERSION_ANY_ID },
 	{ 0, }
 };
 
-static struct parisc_driver cpu_driver = {
+static struct parisc_driver cpu_driver __refdata = {
 	.name		= "CPU",
 	.id_table	= processor_tbl,
 	.probe		= processor_probe
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 1db58e546230..cc9963421a19 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -162,6 +162,7 @@ ENDPROC_CFI(restore_control_regs)
 	.text
 	.align 128
 ENTRY_CFI(rfi_virt2real)
+#if !defined(BOOTLOADER)
 	/* switch to real mode... */
 	rsm		PSW_SM_I,%r0
 	load32		PA(rfi_v2r_1), %r1
@@ -191,6 +192,7 @@ ENTRY_CFI(rfi_virt2real)
 	nop
 rfi_v2r_1:
 	tophys_r1 %r2
+#endif /* defined(BOOTLOADER) */
 	bv	0(%r2)
 	nop
 ENDPROC_CFI(rfi_virt2real)
@@ -198,6 +200,7 @@ ENDPROC_CFI(rfi_virt2real)
 	.text
 	.align 128
 ENTRY_CFI(rfi_real2virt)
+#if !defined(BOOTLOADER)
 	rsm		PSW_SM_I,%r0
 	load32		(rfi_r2v_1), %r1
 	nop
@@ -226,6 +229,7 @@ ENTRY_CFI(rfi_real2virt)
 	nop
 rfi_r2v_1:
 	tovirt_r1 %r2
+#endif /* defined(BOOTLOADER) */
 	bv	0(%r2)
 	nop
 ENDPROC_CFI(rfi_real2virt)
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 1b73690477c5..48dc7d4d20bb 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -34,7 +34,7 @@
 extern struct unwind_table_entry __start___unwind[];
 extern struct unwind_table_entry __stop___unwind[];
 
-static spinlock_t unwind_lock;
+static DEFINE_SPINLOCK(unwind_lock);
 /*
  * the kernel unwind block is not dynamically allocated so that
  * we can call unwind_init as early in the bootup process as 
@@ -181,8 +181,6 @@ int __init unwind_init(void)
 	start = (long)&__start___unwind[0];
 	stop = (long)&__stop___unwind[0];
 
-	spin_lock_init(&unwind_lock);
-
 	printk("unwind_init: start = 0x%lx, end = 0x%lx, entries = %lu\n", 
 	    start, stop,
 	    (stop - start) / sizeof(struct unwind_table_entry));
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 99115cd9e790..865a7f796c7f 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -27,8 +27,6 @@
 #include <linux/compiler.h>
 #include <linux/uaccess.h>
 
-DECLARE_PER_CPU(struct exception_data, exception_data);
-
 #define get_user_space() (uaccess_kernel() ? 0 : mfsp(3))
 #define get_kernel_space() (0)
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 81b0031f909f..809c468edab1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -85,6 +85,17 @@ config NMI_IPI
 	depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
 	default y
 
+config PPC_WATCHDOG
+	bool
+	depends on HARDLOCKUP_DETECTOR
+	depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+	default y
+	help
+	  This is a placeholder when the powerpc hardlockup detector
+	  watchdog is selected (arch/powerpc/kernel/watchdog.c). It is
+	  seleted via the generic lockup detector menu which is why we
+	  have no standalone config option for it here.
+
 config STACKTRACE_SUPPORT
 	bool
 	default y
@@ -165,7 +176,7 @@ config PPC
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if COMPAT
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
-	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION)
+	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
 	select ARCH_OPTIONAL_KERNEL_RWX		if ARCH_HAS_STRICT_KERNEL_RWX
 	select HAVE_CBPF_JIT			if !PPC64
 	select HAVE_CONTEXT_TRACKING		if PPC64
@@ -356,10 +367,6 @@ config PPC_ADV_DEBUG_DAC_RANGE
 	depends on PPC_ADV_DEBUG_REGS && 44x
 	default y
 
-config PPC_EMULATE_SSTEP
-	bool
-	default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
-
 config ZONE_DMA32
 	bool
 	default y if PPC64
@@ -394,7 +401,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
 
 config MATH_EMULATION
 	bool "Math emulation"
-	depends on 4xx || 8xx || PPC_MPC832x || BOOKE
+	depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
 	---help---
 	  Some PowerPC chips designed for embedded applications do not have
 	  a floating-point unit and therefore do not implement the
@@ -956,9 +963,9 @@ config PPC_PCI_CHOICE
 
 config PCI
 	bool "PCI support" if PPC_PCI_CHOICE
-	default y if !40x && !CPM2 && !8xx && !PPC_83xx \
+	default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \
 		&& !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
-	default PCI_QSPAN if !4xx && !CPM2 && 8xx
+	default PCI_QSPAN if PPC_8xx
 	select GENERIC_PCI_IOMAP
 	help
 	  Find out whether your system includes a PCI bus. PCI is the name of
@@ -974,7 +981,7 @@ config PCI_SYSCALL
 
 config PCI_QSPAN
 	bool "QSpan PCI"
-	depends on !4xx && !CPM2 && 8xx
+	depends on PPC_8xx
 	select PPC_I8259
 	help
 	  Say Y here if you have a system based on a Motorola 8xx-series
@@ -1165,12 +1172,23 @@ config CONSISTENT_SIZE
 
 config PIN_TLB
 	bool "Pinned Kernel TLBs (860 ONLY)"
-	depends on ADVANCED_OPTIONS && 8xx
+	depends on ADVANCED_OPTIONS && PPC_8xx && \
+		   !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX
+
+config PIN_TLB_DATA
+	bool "Pinned TLB for DATA"
+	depends on PIN_TLB
+	default y
 
 config PIN_TLB_IMMR
 	bool "Pinned TLB for IMMR"
 	depends on PIN_TLB
 	default y
+
+config PIN_TLB_TEXT
+	bool "Pinned TLB for TEXT"
+	depends on PIN_TLB
+	default y
 endmenu
 
 if PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2b3e7a00c9e..1381693a4a51 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -250,7 +250,7 @@ KBUILD_AFLAGS += $(aflags-y)
 KBUILD_CFLAGS += $(cflags-y)
 
 head-y				:= arch/powerpc/kernel/head_$(BITS).o
-head-$(CONFIG_8xx)		:= arch/powerpc/kernel/head_8xx.o
+head-$(CONFIG_PPC_8xx)		:= arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)		:= arch/powerpc/kernel/head_40x.o
 head-$(CONFIG_44x)		:= arch/powerpc/kernel/head_44x.o
 head-$(CONFIG_FSL_BOOKE)	:= arch/powerpc/kernel/head_fsl_booke.o
@@ -317,6 +317,10 @@ PHONY += ppc64le_defconfig
 ppc64le_defconfig:
 	$(call merge_into_defconfig,ppc64_defconfig,le)
 
+PHONY += powernv_be_defconfig
+powernv_be_defconfig:
+	$(call merge_into_defconfig,powernv_defconfig,be)
+
 PHONY += mpc85xx_defconfig
 mpc85xx_defconfig:
 	$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 9d3bd4c45a24..f7da65169124 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -564,7 +564,7 @@ void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
 		fbdv = 16;
 	cbdv = ((pllmr & 0x00060000) >> 17) + 1; /* CPU:PLB */
 	opdv = ((pllmr & 0x00018000) >> 15) + 1; /* PLB:OPB */
-	ppdv = ((pllmr & 0x00001800) >> 13) + 1; /* PLB:PCI */
+	ppdv = ((pllmr & 0x00006000) >> 13) + 1; /* PLB:PCI */
 	epdv = ((pllmr & 0x00001800) >> 11) + 2; /* PLB:EBC */
 	udiv = ((cpc0_cr0 & 0x3e) >> 1) + 1;
 
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6f952fe1f084..c4e6fe35c075 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -107,17 +107,18 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
 		$(libfdt) libfdt-wrapper.c \
 		ns16550.c serial.c simple_alloc.c div64.S util.S \
 		elf_util.c $(zlib-y) devtree.c stdlib.c \
-		oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
-		uartlite.c mpc52xx-psc.c opal.c
+		oflib.c ofconsole.c cuboot.c cpm-serial.c \
+		uartlite.c opal.c
+src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
 src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) +=  opal-calls.S
 ifndef CONFIG_PPC64_BOOT_WRAPPER
 src-wlib-y += crtsavres.S
 endif
 src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
 src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
-src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
+src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
 
 src-plat-y := of.c epapr.c
 src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
@@ -132,7 +133,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
 				treeboot-iss4xx.c treeboot-currituck.c \
 				treeboot-akebono.c \
 				simpleboot.c fixed-head.S virtex.c
-src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
+src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
 src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
 src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
 src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 12866ccb5694..dcf2f15e6797 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -26,17 +26,17 @@ _zimage_start_opd:
 
 #ifdef __powerpc64__
 .balign 8
-p_start:	.llong	_start
-p_etext:	.llong	_etext
-p_bss_start:	.llong	__bss_start
-p_end:		.llong	_end
-
-p_toc:		.llong	__toc_start + 0x8000 - p_base
-p_dyn:		.llong	__dynamic_start - p_base
-p_rela:		.llong	__rela_dyn_start - p_base
-p_prom:		.llong	0
+p_start:	.8byte	_start
+p_etext:	.8byte	_etext
+p_bss_start:	.8byte	__bss_start
+p_end:		.8byte	_end
+
+p_toc:		.8byte	__toc_start + 0x8000 - p_base
+p_dyn:		.8byte	__dynamic_start - p_base
+p_rela:		.8byte	__rela_dyn_start - p_base
+p_prom:		.8byte	0
 	.weak	_platform_stack_top
-p_pstack:	.llong	_platform_stack_top
+p_pstack:	.8byte	_platform_stack_top
 #else
 p_start:	.long	_start
 p_etext:	.long	_etext
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index 475953ada707..f10a64aeb83b 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -52,6 +52,7 @@
 	clocks {
 		mmc_clk: mmc_clk {
 			compatible = "fixed-clock";
+			#clock-cells = <0>;
 			clock-frequency = <50000000>;
 			clock-output-names = "mmc_clk";
 		};
@@ -359,20 +360,6 @@
 		interrupts = <31 0x4 15 0x84>;
 	};
 
-	mmc0: sdhci@020c0000 {
-		compatible	= "st,sdhci-stih407", "st,sdhci";
-		status		= "disabled";
-		reg		= <0x020c0000 0x20000>;
-		reg-names	= "mmc";
-		interrupt-parent = <&UIC1_3>;
-		interrupts	= <21 0x4 22 0x4>;
-		interrupt-names	= "mmcirq";
-		pinctrl-names	= "default";
-		pinctrl-0	= <>;
-		clock-names	= "mmc";
-		clocks		= <&mmc_clk>;
-	};
-
 	plb6 {
 		compatible = "ibm,plb6";
 		#address-cells = <2>;
@@ -501,6 +488,24 @@
 					 /*RXDE*/  4 &UIC1_2 13 0x4>;
 		};
 
+		mmc0: mmc@20c0000 {
+			compatible	= "st,sdhci-stih407", "st,sdhci";
+			reg		= <0x020c0000 0x20000>;
+			reg-names	= "mmc";
+			interrupts	= <21 0x4>;
+			interrupt-parent = <&UIC1_3>;
+			interrupt-names	= "mmcirq";
+			pinctrl-names	= "default";
+			pinctrl-0	= <>;
+			clock-names	= "mmc";
+			clocks		= <&mmc_clk>;
+			bus-width	= <4>;
+			non-removable;
+			sd-uhs-sdr50;
+			sd-uhs-sdr104;
+			sd-uhs-ddr50;
+		};
+
 		opb {
 			compatible = "ibm,opb";
 			#address-cells = <1>;
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index 68e388ee94fe..c63299f9fdd9 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -80,4 +80,12 @@
 	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
 	.long 0x2400004c  /* rfid				*/
 
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest)			mftb dest
+#define MFTBU(dest)			mftbu dest
+#else
+#define MFTBL(dest)			mfspr dest, SPRN_TBRL
+#define MFTBU(dest)			mfspr dest, SPRN_TBRU
+#endif
+
 #endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index e04c1e4063ae..7b5c02b1afd0 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -120,15 +120,19 @@ int serial_console_init(void)
 	if (dt_is_compatible(devp, "ns16550") ||
 	    dt_is_compatible(devp, "pnpPNP,501"))
 		rc = ns16550_console_init(devp, &serial_cd);
+#ifdef CONFIG_EMBEDDED6xx
 	else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
 		rc = mpsc_console_init(devp, &serial_cd);
+#endif
 	else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
 	         dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
 	         dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
 	         dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
 		rc = cpm_console_init(devp, &serial_cd);
+#ifdef CONFIG_PPC_MPC52XX
 	else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
 		rc = mpc5200_psc_console_init(devp, &serial_cd);
+#endif
 	else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
 		 dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
 		rc = uartlite_console_init(devp, &serial_cd);
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index 243b8497d58b..ec069177d942 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -71,32 +71,18 @@ udelay:
 	add	r4,r4,r5
 	addi	r4,r4,-1
 	divw	r4,r4,r5	/* BUS ticks */
-#ifdef CONFIG_8xx
-1:	mftbu	r5
-	mftb	r6
-	mftbu	r7
-#else
-1:	mfspr	r5, SPRN_TBRU
-	mfspr	r6, SPRN_TBRL
-	mfspr	r7, SPRN_TBRU
-#endif
+1:	MFTBU(r5)
+	MFTBL(r6)
+	MFTBU(r7)
 	cmpw	0,r5,r7
 	bne	1b		/* Get [synced] base time */
 	addc	r9,r6,r4	/* Compute end time */
 	addze	r8,r5
-#ifdef CONFIG_8xx
-2:	mftbu	r5
-#else
-2:	mfspr	r5, SPRN_TBRU
-#endif
+2:	MFTBU(r5)
 	cmpw	0,r5,r8
 	blt	2b
 	bgt	3f
-#ifdef CONFIG_8xx
-	mftb	r6
-#else
-	mfspr	r6, SPRN_TBRL
-#endif
+	MFTBL(r6)
 	cmpw	0,r6,r9
 	blt	2b
 3:	blr
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index 3438ed99c088..e57344c3b0d7 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -64,4 +64,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index 36c44c0b560c..0f66f8a87be8 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -64,4 +64,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index ad2156c6e2fc..b5cc7426c21f 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -72,4 +72,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index 28adb782ec51..caab658d1da1 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -26,7 +26,6 @@ CONFIG_SCSI_SAS_ATTRS=y
 # CONFIG_VT is not set
 # CONFIG_UNIX98_PTYS is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index a00f434c4d47..e0b1489b7c7b 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -62,4 +62,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
index e500e6a12b3e..aac06d2ad01a 100644
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -72,4 +72,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig
index 65dc084a154c..a2b2770eee8f 100644
--- a/arch/powerpc/configs/40x/virtex_defconfig
+++ b/arch/powerpc/configs/40x/virtex_defconfig
@@ -41,9 +41,9 @@ CONFIG_NETDEVICES=y
 CONFIG_SERIO_XILINX_XPS_PS2=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_XILINX_HWICAP=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
@@ -74,4 +74,3 @@ CONFIG_FONT_8x16=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index 567f99bd64a3..6faa03cd661c 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -57,4 +57,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 143b2fbddb46..9fcd361607e2 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -123,7 +123,6 @@ CONFIG_NLS_DEFAULT="n"
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_XMON=y
@@ -135,5 +134,4 @@ CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_SHA1_PPC=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 477d99fefd9a..6f3a6ecc81e7 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -55,4 +55,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index 3799a26de6f4..5f1df5fe4453 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -81,7 +81,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NLS_DEFAULT="n"
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_XMON=y
@@ -94,5 +93,4 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index c265f54ab9e5..e2b6578993d5 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -59,5 +59,4 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index bb6bd6d90821..f6dc23fef683 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -84,18 +84,14 @@ CONFIG_CRYPTO_CCM=y
 CONFIG_CRYPTO_GCM=y
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_LRW=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_XCBC=y
 CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_ARC4=y
 CONFIG_CRYPTO_BLOWFISH=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e8e6a6999852..bae6b26bcfba 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -92,8 +92,10 @@ CONFIG_MMC_DEBUG=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_MMC_SDHCI_ST=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_M41T80=y
+CONFIG_RESET_CONTROLLER=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -115,7 +117,6 @@ CONFIG_PRINTK_TIME=y
 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_CRYPTO_CBC=y
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 060f2edddb71..4453a4590b1a 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -47,8 +47,6 @@ CONFIG_FUSION_LOGGING=y
 CONFIG_NETDEVICES=y
 CONFIG_IBM_EMAC=y
 # CONFIG_WLAN is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
 # CONFIG_MOUSE_PS2_SYNAPTICS is not set
@@ -94,4 +92,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index 115a6b2be18b..d24bfa6ecd62 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -63,7 +63,6 @@ CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PPC_EARLY_DEBUG=y
@@ -72,5 +71,4 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index b999048c4ae6..5d3f685a7af8 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -60,4 +60,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index b8c9ee45d0a2..7b8355a5698d 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -66,4 +66,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index a4bb048448da..918cfb63f0c8 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -83,18 +83,14 @@ CONFIG_CRYPTO_CCM=y
 CONFIG_CRYPTO_GCM=y
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_LRW=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_XCBC=y
 CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_ARC4=y
 CONFIG_CRYPTO_BLOWFISH=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index b3792fd8111d..1e04122912f3 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -67,4 +67,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index ff6f86241418..42cc7b4ed95f 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -61,4 +61,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig
index ce052064bcbb..99cc3dc02df1 100644
--- a/arch/powerpc/configs/44x/virtex5_defconfig
+++ b/arch/powerpc/configs/44x/virtex5_defconfig
@@ -40,9 +40,9 @@ CONFIG_NETDEVICES=y
 CONFIG_SERIO_XILINX_XPS_PS2=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_XILINX_HWICAP=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
@@ -73,4 +73,3 @@ CONFIG_FONT_8x16=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index ab932488e68b..b5c866073efd 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -97,4 +97,3 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig
index c1faac800806..73948e88ac82 100644
--- a/arch/powerpc/configs/52xx/cm5200_defconfig
+++ b/arch/powerpc/configs/52xx/cm5200_defconfig
@@ -77,4 +77,3 @@ CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
index 9493b02ac660..6fc7f786c83c 100644
--- a/arch/powerpc/configs/52xx/lite5200b_defconfig
+++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
@@ -14,6 +14,7 @@ CONFIG_PPC_MPC52xx=y
 CONFIG_PPC_MPC5200_SIMPLE=y
 CONFIG_PPC_LITE5200=y
 # CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -44,7 +45,6 @@ CONFIG_SERIAL_MPC52xx=y
 CONFIG_SERIAL_MPC52xx_CONSOLE=y
 CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -62,4 +62,3 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
index fe8126bc1655..ae2a1f74103b 100644
--- a/arch/powerpc/configs/52xx/motionpro_defconfig
+++ b/arch/powerpc/configs/52xx/motionpro_defconfig
@@ -41,16 +41,16 @@ CONFIG_ATA=y
 CONFIG_PATA_MPC52xx=y
 CONFIG_NETDEVICES=y
 CONFIG_FEC_MPC52xx=y
-CONFIG_MARVELL_PHY=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
 CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
+CONFIG_ICPLUS_PHY=y
 CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_QSEMI_PHY=y
 CONFIG_SMSC_PHY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_ICPLUS_PHY=y
-CONFIG_MDIO_BITBANG=y
+CONFIG_VITESSE_PHY=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
@@ -90,4 +90,3 @@ CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
index b8b316b884aa..0777e6efd22d 100644
--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -48,7 +48,6 @@ CONFIG_PATA_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_FEC_MPC52xx=y
 CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
 CONFIG_SERIAL_MPC52xx=y
 CONFIG_SERIAL_MPC52xx_CONSOLE=y
 CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -92,4 +91,3 @@ CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig
index b60cac088a7b..dd884df32dfd 100644
--- a/arch/powerpc/configs/83xx/asp8347_defconfig
+++ b/arch/powerpc/configs/83xx/asp8347_defconfig
@@ -42,7 +42,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -71,4 +70,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
index 9547dcdd6489..d21b5cb365f2 100644
--- a/arch/powerpc/configs/83xx/kmeter1_defconfig
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -55,7 +55,6 @@ CONFIG_HDLC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index 80aa844c1428..1f69f4edf074 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -48,8 +48,6 @@ CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
 CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -87,4 +85,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index d89d13bc6901..797fc3ffddee 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -47,7 +47,6 @@ CONFIG_MD_RAID1=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -85,4 +84,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
index e789518a2881..4f914906ee4b 100644
--- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
@@ -14,7 +14,6 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC832x_MDS=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -36,7 +35,6 @@ CONFIG_SCSI=y
 CONFIG_NETDEVICES=y
 CONFIG_UCC_GETH=y
 CONFIG_DAVICOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -50,6 +48,7 @@ CONFIG_I2C_MPC=y
 CONFIG_WATCHDOG=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -59,4 +58,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
index 917a49ca2bd1..a484eb8401e8 100644
--- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -14,7 +14,7 @@ CONFIG_LDM_PARTITION=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC832x_RDB=y
-CONFIG_QUICC_ENGINE=y
+CONFIG_GEN_RTC=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -38,7 +38,6 @@ CONFIG_NETDEVICES=y
 CONFIG_UCC_GETH=y
 CONFIG_E1000=y
 CONFIG_ICPLUS_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -46,7 +45,6 @@ CONFIG_ICPLUS_PHY=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -62,6 +60,7 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SPI=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
@@ -78,4 +77,3 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
index 00f636e95cc8..37f4d93b3f81 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -49,7 +49,6 @@ CONFIG_MD_RAID1=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
@@ -84,4 +83,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
index a539d44d1dba..7adb6708a761 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -75,4 +75,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
index 9f0ddc830c82..d7ce3551529d 100644
--- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
@@ -35,7 +35,6 @@ CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
 CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -58,4 +57,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
index ceed4c1f0ab5..92134cee3f37 100644
--- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
@@ -14,7 +14,6 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC836x_MDS=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_PCI=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -41,7 +40,6 @@ CONFIG_SCSI=y
 CONFIG_NETDEVICES=y
 CONFIG_UCC_GETH=y
 CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -55,6 +53,7 @@ CONFIG_I2C_MPC=y
 CONFIG_WATCHDOG=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -64,4 +63,3 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
index a6819bf3ef5e..97f7ea5f205f 100644
--- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
@@ -12,7 +12,6 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC836x_RDK=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_QE_GPIO=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -39,11 +38,9 @@ CONFIG_BLK_DEV_RAM_SIZE=32768
 CONFIG_NETDEVICES=y
 CONFIG_UCC_GETH=y
 CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_QE=y
@@ -63,6 +60,7 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -72,4 +70,3 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
index 4bd1992e4d98..ee7510a33d06 100644
--- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC837x_MDS=y
+CONFIG_GEN_RTC=y
 CONFIG_PCI=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -35,7 +36,6 @@ CONFIG_SATA_FSL=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -43,7 +43,6 @@ CONFIG_MARVELL_PHY=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index 2d4bb63882b8..8966a9af4230 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_MPC837x_RDB=y
+CONFIG_GEN_RTC=y
 CONFIG_PCI=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -41,9 +42,7 @@ CONFIG_MD_RAID456=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_MARVELL_PHY=y
-CONFIG_FIXED_PHY=y
 CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -51,7 +50,6 @@ CONFIG_INPUT_FF_MEMLESS=m
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -86,4 +84,3 @@ CONFIG_CRC_T10DIF=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig
index b3380dbd1925..7d74699334da 100644
--- a/arch/powerpc/configs/83xx/sbc834x_defconfig
+++ b/arch/powerpc/configs/83xx/sbc834x_defconfig
@@ -11,6 +11,7 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_83xx=y
 CONFIG_SBC834x=y
+CONFIG_GEN_RTC=y
 CONFIG_PCI=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -41,7 +42,6 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -52,7 +52,6 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=2
 CONFIG_SERIAL_8250_RUNTIME_UARTS=2
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -72,5 +71,4 @@ CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index a917f7afb4f9..dd98f43b2fb8 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -22,7 +22,6 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_GE_IMP3A=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_QE_GPIO=y
 CONFIG_CPM2=y
 CONFIG_HIGHMEM=y
@@ -161,6 +160,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_RX8581=y
 CONFIG_DMADEVICES=y
 CONFIG_FSL_DMA=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -233,5 +233,4 @@ CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig
index bd814dfb0bbd..9ce6f48cfb61 100644
--- a/arch/powerpc/configs/85xx/ksi8560_defconfig
+++ b/arch/powerpc/configs/85xx/ksi8560_defconfig
@@ -8,6 +8,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_KSI8560=y
 CONFIG_CPM2=y
+CONFIG_GEN_RTC=y
 CONFIG_HIGHMEM=y
 CONFIG_BINFMT_MISC=y
 CONFIG_MATH_EMULATION=y
@@ -39,14 +40,12 @@ CONFIG_FS_ENET=y
 CONFIG_FS_ENET_MDIO_FCC=y
 CONFIG_GIANFAR=y
 CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -57,4 +56,3 @@ CONFIG_DEBUG_FS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 32af10def641..5fbc3f904046 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_MPC8540_ADS=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_MATH_EMULATION=y
 # CONFIG_SECCOMP is not set
@@ -30,7 +31,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -38,7 +38,6 @@ CONFIG_GIANFAR=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -47,4 +46,3 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index a52b2170ee33..ff981d7905c7 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -7,6 +7,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_MPC8560_ADS=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_MATH_EMULATION=y
 # CONFIG_SECCOMP is not set
@@ -32,16 +33,14 @@ CONFIG_FS_ENET=y
 # CONFIG_FS_ENET_HAS_SCC is not set
 CONFIG_GIANFAR=y
 CONFIG_E1000=y
-CONFIG_MARVELL_PHY=y
 CONFIG_DAVICOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_MARVELL_PHY=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -50,4 +49,3 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 002bb48abaa3..974f0706d777 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_MPC85xx_CDS=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_MATH_EMULATION=y
 # CONFIG_SECCOMP is not set
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E1000=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -43,7 +43,6 @@ CONFIG_E1000=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -52,4 +51,3 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
index 97ae02377cf3..7e3e84a842e4 100644
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ b/arch/powerpc/configs/85xx/sbc8548_defconfig
@@ -6,6 +6,7 @@ CONFIG_EXPERT=y
 CONFIG_SLAB=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_SBC8548=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_MATH_EMULATION=y
 # CONFIG_SECCOMP is not set
@@ -36,7 +37,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -44,10 +44,8 @@ CONFIG_BROADCOM_PHY=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig
index 13579cb30539..6106fadbbd8b 100644
--- a/arch/powerpc/configs/85xx/socrates_defconfig
+++ b/arch/powerpc/configs/85xx/socrates_defconfig
@@ -42,8 +42,6 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_MARVELL_PHY=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=800
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -86,4 +84,3 @@ CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_FONTS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index 384926f3ce1d..5b9cc01b9098 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -39,8 +39,6 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_MARVELL_PHY=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=m
 # CONFIG_VT is not set
@@ -68,4 +66,3 @@ CONFIG_CRC_T10DIF=m
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_BDI_SWITCH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
index 908f3885f4a5..98982a0e82d8 100644
--- a/arch/powerpc/configs/85xx/tqm8540_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_TQM8540=y
+CONFIG_GEN_RTC=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -35,14 +36,12 @@ CONFIG_BLK_DEV_VIA82CXXX=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -56,4 +55,3 @@ CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
index f47e57610b7c..a6e21db1dafe 100644
--- a/arch/powerpc/configs/85xx/tqm8541_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_TQM8541=y
+CONFIG_GEN_RTC=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
index 42f5d0a7698e..2697e4e8a761 100644
--- a/arch/powerpc/configs/85xx/tqm8548_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -43,7 +43,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -66,4 +65,3 @@ CONFIG_ROOT_NFS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
index 71552b7929cd..ca1de3979474 100644
--- a/arch/powerpc/configs/85xx/tqm8555_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_TQM8555=y
+CONFIG_GEN_RTC=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
index 25aac973d6d7..ca3b8c8ef30f 100644
--- a/arch/powerpc/configs/85xx/tqm8560_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_MSDOS_PARTITION is not set
 CONFIG_TQM8560=y
+CONFIG_GEN_RTC=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
 CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
 CONFIG_NETDEVICES=y
 CONFIG_GIANFAR=y
 CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index 72900b84d3e0..6531139a8a8d 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -54,7 +54,6 @@ CONFIG_IP_PIMSM_V2=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_IPV6=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_REDBOOT_PARTS=y
@@ -86,7 +85,6 @@ CONFIG_DUMMY=y
 CONFIG_GIANFAR=y
 CONFIG_E1000=y
 CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_SERIO_LIBPS2=y
@@ -107,8 +105,8 @@ CONFIG_SENSORS_LM90=y
 CONFIG_WATCHDOG=y
 CONFIG_USB=y
 CONFIG_USB_MON=y
-CONFIG_USB_ISP1760=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_PCA955X=y
@@ -143,4 +141,3 @@ CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 6a3f825452e9..935ea3ade7de 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -12,6 +12,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_PPC_ADDER875=y
 CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
 CONFIG_HZ_1000=y
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
@@ -41,7 +42,6 @@ CONFIG_DAVICOM_PHY=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 # CONFIG_HWMON is not set
 CONFIG_THERMAL=y
 # CONFIG_USB_SUPPORT is not set
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index 8d3e3c41258d..12f397d403c6 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -45,7 +45,6 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_BLK_DEV_FD=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
-CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
 CONFIG_BLK_DEV_SR=y
@@ -120,5 +119,4 @@ CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_CBC=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/be.config b/arch/powerpc/configs/be.config
new file mode 100644
index 000000000000..c5cdc99a6530
--- /dev/null
+++ b/arch/powerpc/configs/be.config
@@ -0,0 +1 @@
+CONFIG_CPU_BIG_ENDIAN=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 7c9d95370150..f1552af9eecc 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -27,6 +27,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+CONFIG_GEN_RTC=y
 CONFIG_HIGHMEM=y
 CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_BINFMT_MISC=y
@@ -197,7 +198,6 @@ CONFIG_TUN=m
 # CONFIG_ATM_DRIVERS is not set
 CONFIG_MV643XX_ETH=y
 CONFIG_VITESSE_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -209,7 +209,6 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_SERIAL_MPSC=y
 CONFIG_SERIAL_MPSC_CONSOLE=y
 CONFIG_NVRAM=m
-CONFIG_GEN_RTC=m
 CONFIG_RAW_DRIVER=y
 CONFIG_MAX_RAW_DEVS=8192
 CONFIG_I2C=m
@@ -390,7 +389,6 @@ CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA1=y
@@ -402,4 +400,3 @@ CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index aa564599e368..560a93a84efe 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -4,7 +4,6 @@ CONFIG_ALTIVEC=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -34,10 +33,10 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=m
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_NUMA=y
-CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
 CONFIG_PCIEPORTBUS=y
@@ -53,7 +52,6 @@ CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=y
 CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_IPV6=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
@@ -141,7 +139,6 @@ CONFIG_SKY2=m
 CONFIG_GELIC_NET=m
 CONFIG_GELIC_WIRELESS=y
 CONFIG_SPIDER_NET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO_I8042 is not set
@@ -149,8 +146,6 @@ CONFIG_SPIDER_NET=y
 CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_TXX9_NR_UARTS=2
-CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_HVC_RTAS=y
 CONFIG_IPMI_HANDLER=m
@@ -159,7 +154,6 @@ CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
 CONFIG_IPMI_POWEROFF=m
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_WATCHDOG=y
 # CONFIG_VGA_CONSOLE is not set
@@ -207,7 +201,6 @@ CONFIG_NLS_ISO8859_13=m
 CONFIG_NLS_ISO8859_14=m
 CONFIG_NLS_ISO8859_15=m
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index 1f6f90cd8aff..a203b1cf67d3 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -16,6 +16,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
 # CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
 CONFIG_HIGHMEM=y
 CONFIG_BINFMT_MISC=y
 CONFIG_IRQ_ALL_CPUS=y
@@ -79,7 +80,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
@@ -124,5 +124,4 @@ CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_CBC=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index 3403b85f9d81..2e6c8a45ae88 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -70,5 +70,4 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index 95411aeeeb8d..7cb590e8f8fd 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -14,6 +14,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_PPC_EP88XC=y
 CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
@@ -45,7 +46,6 @@ CONFIG_LXT_PHY=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index e18f2e06553f..e084fa548d73 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -4,7 +4,6 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -25,6 +24,7 @@ CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_PMAC64=y
+CONFIG_GEN_RTC=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_PCI_MSI=y
@@ -115,7 +115,6 @@ CONFIG_USB_USBNET=m
 # CONFIG_USB_NET_NET1080 is not set
 # CONFIG_USB_NET_CDC_SUBSET is not set
 # CONFIG_USB_NET_ZAURUS is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
@@ -123,7 +122,6 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_AGP=m
@@ -213,20 +211,20 @@ CONFIG_USB_SERIAL_CYBERJACK=m
 CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_APPLEDISPLAY=m
-CONFIG_FS_DAX=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=y
 CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -254,14 +252,12 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_15=y
 CONFIG_NLS_UTF8=y
 CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTX_TEXT=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
@@ -276,5 +272,4 @@ CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index c0eec4a5df4e..79bbc8238b32 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -45,7 +45,6 @@ CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_NETDEVICES=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_JOYDEV=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
@@ -54,7 +53,6 @@ CONFIG_INPUT_JOYSTICK=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_LEGACY_PTY_COUNT=64
-# CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index e56e80090529..71d8d2430b6c 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_EMBEDDED6xx=y
 CONFIG_PPC_HOLLY=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyS0,115200"
@@ -37,7 +38,6 @@ CONFIG_NETDEVICES=y
 CONFIG_VORTEX=y
 CONFIG_TSI108_ETH=y
 CONFIG_PHYLIB=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -49,7 +49,6 @@ CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index b413c19d7031..477794c41d50 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m
@@ -79,7 +78,6 @@ CONFIG_NET_TULIP=y
 CONFIG_TULIP=y
 CONFIG_TULIP_MMIO=y
 CONFIG_R8169=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -142,4 +140,3 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index c4018179e219..078cdb427fc9 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -3,7 +3,6 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -24,6 +23,7 @@ CONFIG_MAC_PARTITION=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_MAPLE=y
 CONFIG_UDBG_RTAS_CONSOLE=y
+CONFIG_GEN_RTC=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_PCI_MSI=y
@@ -53,9 +53,6 @@ CONFIG_AMD8111_ETH=y
 CONFIG_TIGON3=y
 CONFIG_E1000=y
 CONFIG_USB_PEGASUS=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -63,7 +60,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HVC_RTAS=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_AMD8111=y
@@ -100,8 +96,8 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
 CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
 CONFIG_USB_SERIAL_TI=m
 CONFIG_EXT2_FS=y
-CONFIG_FS_DAX=y
 CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_PROC_KCORE=y
@@ -127,5 +123,4 @@ CONFIG_BOOTX_TEXT=y
 CONFIG_PPC_EARLY_DEBUG=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index 197acaa026eb..5d5f08e5b8d9 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -46,7 +46,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
 CONFIG_FS_ENET=y
 CONFIG_FS_ENET_MDIO_FCC=y
-CONFIG_FIXED_PHY=y
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
@@ -83,5 +82,4 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_BDI_SWITCH=y
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index 0b4854cf26cb..10be5773ad5d 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -12,6 +12,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_IOSCHED_CFQ is not set
 # CONFIG_PPC_CHRP is not set
 CONFIG_PPC_MPC512x=y
+CONFIG_MPC512x_LPBFIFO=y
 CONFIG_MPC5121_ADS=y
 CONFIG_MPC512x_GENERIC=y
 CONFIG_PDM360NG=y
@@ -61,25 +62,22 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_SG=y
 CONFIG_NETDEVICES=y
 CONFIG_FS_ENET=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
-CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_SMSC_PHY=y
+CONFIG_MDIO_BITBANG=y
 CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
+CONFIG_DAVICOM_PHY=y
 CONFIG_ICPLUS_PHY=y
-CONFIG_REALTEK_PHY=y
+CONFIG_LSI_ET1011C_PHY=y
+CONFIG_LXT_PHY=y
+CONFIG_MARVELL_PHY=y
 CONFIG_NATIONAL_PHY=y
+CONFIG_QSEMI_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_SMSC_PHY=y
 CONFIG_STE10XP=y
-CONFIG_LSI_ET1011C_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_MDIO_BITBANG=y
+CONFIG_VITESSE_PHY=y
 # CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=y
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_MPC52xx=y
 CONFIG_SERIAL_MPC52xx_CONSOLE=y
 CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -111,10 +109,9 @@ CONFIG_RTC_DRV_M41T80=y
 CONFIG_RTC_DRV_MPC5121=y
 CONFIG_DMADEVICES=y
 CONFIG_MPC512X_DMA=y
-CONFIG_MPC512x_LPBFIFO=y
-CONFIG_FS_DAX=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
 # CONFIG_DNOTIFY is not set
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
@@ -126,5 +123,4 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 88336d0df0d6..7a2b2aa37def 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -50,7 +50,6 @@ CONFIG_NETDEVICES=y
 CONFIG_FEC_MPC52xx=y
 CONFIG_AMD_PHY=y
 CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -71,12 +70,10 @@ CONFIG_SENSORS_LM87=m
 CONFIG_WATCHDOG=y
 CONFIG_MFD_SM501=m
 CONFIG_DRM=y
-CONFIG_FB=y
 CONFIG_FB_FOREIGN_ENDIAN=y
 CONFIG_FB_RADEON=y
 CONFIG_FB_SM501=m
 # CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=y
 CONFIG_SND=y
@@ -130,4 +127,3 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig
index d933326b4cf9..4b14c02b437c 100644
--- a/arch/powerpc/configs/mpc7448_hpc2_defconfig
+++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_EMBEDDED6xx=y
 CONFIG_MPC7448HPC2=y
+CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
@@ -38,7 +39,6 @@ CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 CONFIG_TSI108_ETH=y
 CONFIG_PHYLIB=y
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -46,7 +46,6 @@ CONFIG_PHYLIB=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -54,4 +53,3 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig
index 4cb0f617c0d6..b1e88b64536b 100644
--- a/arch/powerpc/configs/mpc8272_ads_defconfig
+++ b/arch/powerpc/configs/mpc8272_ads_defconfig
@@ -77,5 +77,4 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index 6574477fd726..d1b82035d35f 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -21,7 +21,6 @@ CONFIG_MPC837x_MDS=y
 CONFIG_MPC837x_RDB=y
 CONFIG_SBC834x=y
 CONFIG_ASP834x=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_QE_GPIO=y
 CONFIG_MATH_EMULATION=y
 CONFIG_PCI=y
@@ -60,13 +59,11 @@ CONFIG_SATA_SIL=y
 CONFIG_NETDEVICES=y
 CONFIG_UCC_GETH=y
 CONFIG_GIANFAR=y
-CONFIG_MARVELL_PHY=y
 CONFIG_DAVICOM_PHY=y
-CONFIG_VITESSE_PHY=y
 CONFIG_ICPLUS_PHY=y
-CONFIG_FIXED_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
 CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -99,6 +96,7 @@ CONFIG_USB_EHCI_FSL=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_PROC_KCORE=y
@@ -109,7 +107,5 @@ CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index 998454471a48..f1f176c29fa3 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -14,6 +14,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_MPC86XADS=y
 CONFIG_8xx_COPYBACK=y
 CONFIG_8xx_CPU6=y
+CONFIG_GEN_RTC=y
 CONFIG_HZ_1000=y
 CONFIG_MATH_EMULATION=y
 # CONFIG_SECCOMP is not set
@@ -28,12 +29,10 @@ CONFIG_SYN_COOKIES=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_NETDEVICES=y
 CONFIG_FS_ENET=y
-CONFIG_FIXED_PHY=y
 # CONFIG_VT is not set
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT4_FS=y
@@ -43,4 +42,3 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_CCITT=y
 CONFIG_CRC32_SLICEBY4=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_basic_defconfig
index 3283f0586e11..67bd1fa036ee 100644
--- a/arch/powerpc/configs/mpc86xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc86xx_basic_defconfig
@@ -1,11 +1,11 @@
-CONFIG_HIGHMEM=y
-CONFIG_KEXEC=y
 CONFIG_PPC_86xx=y
-CONFIG_PROC_KCORE=y
+CONFIG_MPC8641_HPCN=y
+CONFIG_SBC8641D=y
+CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_PPC9A=y
 CONFIG_GEF_SBC310=y
 CONFIG_GEF_SBC610=y
-CONFIG_MPC8610_HPCD=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
 CONFIG_MVME7100=y
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 91f53f1bec5d..ec3fcc2bf737 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -13,6 +13,7 @@ CONFIG_EXPERT=y
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
@@ -51,7 +52,6 @@ CONFIG_DAVICOM_PHY=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 139add95a16a..63e38c7220f1 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m
@@ -70,7 +69,6 @@ CONFIG_TUN=m
 # CONFIG_NET_VENDOR_3COM is not set
 CONFIG_E100=y
 # CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -130,4 +128,3 @@ CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index fe43ff47bd2f..8cf4a46bef86 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -3,7 +3,6 @@ CONFIG_ALTIVEC=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
@@ -145,6 +144,7 @@ CONFIG_EDAC=y
 CONFIG_EDAC_PASEMI=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
+CONFIG_RAS=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -167,7 +167,6 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_CRC_CCITT=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
@@ -175,7 +174,5 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_BLOWFISH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index fc1e7a7388b8..8e798b1fbc99 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -21,6 +21,7 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_PMAC=y
 CONFIG_PPC601_SYNC_FIX=y
+CONFIG_GEN_RTC=y
 CONFIG_HIGHMEM=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
@@ -179,10 +180,10 @@ CONFIG_PPP_ASYNC=y
 CONFIG_PPP_SYNC_TTY=m
 CONFIG_USB_USBNET=m
 # CONFIG_USB_NET_CDC_SUBSET is not set
-CONFIG_PRISM54=m
 CONFIG_B43=m
 CONFIG_B43LEGACY=m
 CONFIG_P54_COMMON=m
+CONFIG_PRISM54=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
 # CONFIG_MOUSE_PS2 is not set
@@ -193,7 +194,6 @@ CONFIG_SERIAL_8250=m
 CONFIG_SERIAL_PMACZILOG=m
 CONFIG_SERIAL_PMACZILOG_TTYS=y
 CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
 CONFIG_I2C_CHARDEV=m
 CONFIG_APM_POWER=y
 CONFIG_BATTERY_PMU=y
@@ -201,8 +201,9 @@ CONFIG_HWMON=m
 CONFIG_AGP=m
 CONFIG_AGP_UNINORTH=m
 CONFIG_DRM=m
-CONFIG_DRM_R128=m
 CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
+CONFIG_DRM_R128=m
 CONFIG_FB=y
 CONFIG_FB_OF=y
 CONFIG_FB_CONTROL=y
@@ -300,8 +301,6 @@ CONFIG_NFSD_V4=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_ISO8859_1=m
 CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
@@ -310,7 +309,6 @@ CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
 CONFIG_BOOTX_TEXT=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_SHA512=m
@@ -325,4 +323,3 @@ CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 34fc9bbfca9e..caee834760d2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -1,10 +1,8 @@
 CONFIG_PPC64=y
-CONFIG_SMP=y
 CONFIG_NR_CPUS=2048
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_NO_HZ=y
@@ -26,8 +24,8 @@ CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_BPF=y
 CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_BPF_SYSCALL=y
@@ -62,7 +60,6 @@ CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_PM=y
-CONFIG_PCI_MSI=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -158,7 +155,6 @@ CONFIG_NETCONSOLE=y
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -184,16 +180,13 @@ CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_MISC=y
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_JSM=m
 CONFIG_VIRTIO_CONSOLE=m
-CONFIG_POWERNV_OP_PANEL=m
 CONFIG_IPMI_HANDLER=y
 CONFIG_IPMI_DEVICE_INTERFACE=y
 CONFIG_IPMI_POWERNV=y
@@ -296,9 +289,12 @@ CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
 CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
@@ -310,6 +306,7 @@ CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_CRC32C_VPMSUM=m
 CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
@@ -318,14 +315,13 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_DEV_NX=y
 CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 370c0bbcff71..10fb1df63b46 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -51,9 +51,9 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_XILINX_HWICAP=m
 CONFIG_I2C=m
@@ -85,4 +85,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 2766e8f590bc..66dd6bf45cde 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -68,9 +68,9 @@ CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_SERIAL_8250_PCI is not set
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_XILINX_HWICAP=m
 CONFIG_I2C=m
@@ -94,7 +94,6 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_UBIFS_FS=m
-CONFIG_LOGFS=m
 CONFIG_CRAMFS=y
 CONFIG_SQUASHFS=m
 CONFIG_SQUASHFS_XATTR=y
@@ -108,6 +107,5 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
 CONFIG_VIRTUALIZATION=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c5246d29f385..791db775a09c 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -1,8 +1,6 @@
 CONFIG_PPC64=y
-CONFIG_SMP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -28,9 +26,10 @@ CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PPC_SPLPAR=y
+CONFIG_DTL=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
+CONFIG_IBMEBUS=y
 CONFIG_PPC_MAPLE=y
 CONFIG_PPC_PASEMI=y
 CONFIG_PPC_PASEMI_IOMMU=y
@@ -41,9 +40,8 @@ CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
 CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_PMAC64=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_PMAC64=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
 CONFIG_PPC_TRANSACTIONAL_MEM=y
@@ -51,14 +49,15 @@ CONFIG_KEXEC=y
 CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
-CONFIG_PCCARD=y
-CONFIG_ELECTRA_CF=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
 CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PCCARD=y
+CONFIG_ELECTRA_CF=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -154,7 +153,6 @@ CONFIG_DUMMY=m
 CONFIG_NETCONSOLE=y
 CONFIG_TUN=m
 CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -181,15 +179,14 @@ CONFIG_SUNGEM=y
 CONFIG_GELIC_NET=m
 CONFIG_GELIC_WIRELESS=y
 CONFIG_SPIDER_NET=m
-CONFIG_MARVELL_PHY=y
 CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_PCSPKR=m
@@ -197,7 +194,6 @@ CONFIG_INPUT_PCSPKR=m
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_JSM=m
 CONFIG_HVC_CONSOLE=y
 CONFIG_HVC_RTAS=y
@@ -250,6 +246,9 @@ CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_STORAGE=m
 CONFIG_USB_APPLEDISPLAY=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_MAD=m
 CONFIG_INFINIBAND_USER_ACCESS=m
@@ -267,7 +266,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
+CONFIG_RAS=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -287,6 +286,7 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
@@ -328,9 +328,11 @@ CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
@@ -343,6 +345,7 @@ CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_CRC32C_VPMSUM=m
 CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
@@ -351,19 +354,14 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_NX=y
 CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=m
-CONFIG_LEDS_POWERNV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 6340e6c53c54..d0fe0f8f77c2 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -3,7 +3,6 @@ CONFIG_PPC_BOOK3E_64=y
 CONFIG_SMP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_TASKSTATS=y
@@ -30,8 +29,8 @@ CONFIG_BINFMT_MISC=m
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_SPARSEMEM_MANUAL=y
 CONFIG_PCI_MSI=y
-CONFIG_PCCARD=y
 CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -108,15 +107,14 @@ CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_IXGB=m
 CONFIG_SUNGEM=y
-CONFIG_MARVELL_PHY=y
 CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_MISC=y
 # CONFIG_SERIO_SERPORT is not set
@@ -172,10 +170,8 @@ CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_MTHCA=m
 CONFIG_INFINIBAND_IPOIB=m
 CONFIG_INFINIBAND_ISER=m
-CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
-CONFIG_FS_DAX=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -192,6 +188,7 @@ CONFIG_JFS_POSIX_ACL=y
 CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=m
@@ -251,5 +248,4 @@ CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 18d0d60dadbf..ae6eba482d75 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -11,10 +11,10 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_COMPAT_BRK is not set
@@ -61,7 +61,7 @@ CONFIG_SBC8641D=y
 CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_SBC610=y
 CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT=m
+CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
@@ -70,7 +70,6 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
 CONFIG_CPU_FREQ_PMAC=y
 CONFIG_TAU=y
 CONFIG_TAU_AVERAGE=y
-CONFIG_QUICC_ENGINE=y
 CONFIG_QE_GPIO=y
 CONFIG_MCU_MPC8349EMITX=y
 CONFIG_HIGHMEM=y
@@ -141,7 +140,6 @@ CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
 CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m
@@ -187,7 +185,6 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -195,7 +192,6 @@ CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
 CONFIG_NF_CONNTRACK_IPV4=m
-# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -334,9 +330,7 @@ CONFIG_BT_BNEP_MC_FILTER=y
 CONFIG_BT_BNEP_PROTO_FILTER=y
 CONFIG_BT_HIDP=m
 CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
 CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_LL=y
 CONFIG_BT_HCIBCM203X=m
 CONFIG_BT_HCIBPA10X=m
 CONFIG_BT_HCIBFUSB=m
@@ -370,7 +364,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_HD=y
 CONFIG_ENCLOSURE_SERVICES=m
 CONFIG_SENSORS_TSL2550=m
 CONFIG_EEPROM_AT24=m
@@ -548,16 +541,16 @@ CONFIG_PCMCIA_XIRC2PS=m
 CONFIG_FDDI=y
 CONFIG_SKFP=m
 CONFIG_NET_SB1000=m
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
 CONFIG_BROADCOM_PHY=m
+CONFIG_CICADA_PHY=m
+CONFIG_DAVICOM_PHY=m
 CONFIG_ICPLUS_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_MARVELL_PHY=m
+CONFIG_QSEMI_PHY=m
 CONFIG_REALTEK_PHY=m
+CONFIG_SMSC_PHY=m
+CONFIG_VITESSE_PHY=m
 CONFIG_PLIP=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPP_FILTER=y
@@ -585,7 +578,6 @@ CONFIG_USB_ALI_M5632=y
 CONFIG_USB_AN2720=y
 CONFIG_USB_EPSON2888=y
 CONFIG_USB_KC2190=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=y
 CONFIG_MOUSE_SERIAL=m
@@ -647,7 +639,6 @@ CONFIG_SYNCLINKMP=m
 CONFIG_SYNCLINK_GT=m
 CONFIG_NOZOMI=m
 CONFIG_N_HDLC=m
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_CS=m
@@ -657,13 +648,13 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_8250_DETECT_IRQ=y
 CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_UARTLITE=m
 CONFIG_SERIAL_PMACZILOG=m
 CONFIG_SERIAL_MPC52xx=y
 CONFIG_SERIAL_MPC52xx_CONSOLE=y
 CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
 CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_PRINTER=m
 CONFIG_LP_CONSOLE=y
 CONFIG_PPDEV=m
@@ -748,9 +739,10 @@ CONFIG_MFD_SM501_GPIO=y
 CONFIG_AGP=y
 CONFIG_AGP_UNINORTH=y
 CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
 CONFIG_DRM_TDFX=m
 CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
 CONFIG_DRM_MGA=m
 CONFIG_DRM_SIS=m
 CONFIG_DRM_VIA=m
@@ -899,7 +891,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_EHCI_FSL=m
 CONFIG_USB_OHCI_HCD=m
 CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
 CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
@@ -967,7 +959,6 @@ CONFIG_USB_ADUTUX=m
 CONFIG_USB_SEVSEG=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
 CONFIG_USB_IDMOUSE=m
 CONFIG_USB_FTDI_ELAN=m
 CONFIG_USB_APPLEDISPLAY=m
@@ -1020,15 +1011,14 @@ CONFIG_UIO_CIF=m
 CONFIG_UIO_PDRV_GENIRQ=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
+CONFIG_QUICC_ENGINE=y
 CONFIG_EXT2_FS=m
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_FS=y
 CONFIG_JBD2_DEBUG=y
 CONFIG_REISERFS_FS=m
 CONFIG_REISERFS_PROC_INFO=y
@@ -1042,6 +1032,7 @@ CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_GFS2_FS=m
+CONFIG_FS_DAX=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
@@ -1146,7 +1137,6 @@ CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_HIGHMEM=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_SHIRQ=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
@@ -1173,7 +1163,6 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -1201,7 +1190,6 @@ CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_HIFN_795X=m
 CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
 CONFIG_CRYPTO_DEV_TALITOS=m
diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig
index 50b2bad51d0a..0ededa8c837d 100644
--- a/arch/powerpc/configs/pq2fads_defconfig
+++ b/arch/powerpc/configs/pq2fads_defconfig
@@ -79,4 +79,3 @@ CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ee0ec5a682fc..2efa025bf483 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -5,7 +5,6 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -94,7 +93,6 @@ CONFIG_USB_USBNET=m
 # CONFIG_USB_NET_CDC_SUBSET is not set
 # CONFIG_USB_NET_ZAURUS is not set
 CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=m
 # CONFIG_INPUT_KEYBOARD is not set
@@ -161,7 +159,6 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_CRC_CCITT=m
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DEBUG_STACKOVERFLOW=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index fd5d98a0b95c..3d935969e5a2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -1,11 +1,8 @@
 CONFIG_PPC64=y
-CONFIG_SMP=y
 CONFIG_NR_CPUS=2048
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -18,17 +15,16 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
 CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
 CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_BPF=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_BPF=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_BPF_SYSCALL=y
@@ -43,12 +39,12 @@ CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PPC_SPLPAR=y
+CONFIG_DTL=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
+CONFIG_IBMEBUS=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
@@ -155,7 +151,6 @@ CONFIG_NETCONSOLE=y
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -183,12 +178,10 @@ CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_PCSPKR=m
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_ICOM=m
@@ -198,8 +191,6 @@ CONFIG_HVC_RTAS=y
 CONFIG_HVCS=m
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
 CONFIG_RAW_DRIVER=y
 CONFIG_MAX_RAW_DEVS=1024
 CONFIG_FB=y
@@ -227,6 +218,9 @@ CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_HCD_PPC_OF is not set
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_STORAGE=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_MAD=m
 CONFIG_INFINIBAND_USER_ACCESS=m
@@ -238,9 +232,10 @@ CONFIG_INFINIBAND_IPOIB=m
 CONFIG_INFINIBAND_IPOIB_CM=y
 CONFIG_INFINIBAND_SRP=m
 CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -256,6 +251,7 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
@@ -294,9 +290,11 @@ CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
@@ -307,6 +305,7 @@ CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_CRC32C_VPMSUM=m
 CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
@@ -315,19 +314,14 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_NX=y
 CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=m
-CONFIG_LEDS_POWERNV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 78fddf24b5d3..cd72193fac0a 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -18,6 +18,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_TQM8XX=y
 CONFIG_8xx_COPYBACK=y
 # CONFIG_8xx_CPU15 is not set
+CONFIG_GEN_RTC=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
@@ -44,7 +45,6 @@ CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_NETDEVICES=y
 CONFIG_FS_ENET=y
 CONFIG_DAVICOM_PHY=y
-CONFIG_FIXED_PHY=y
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
@@ -53,7 +53,6 @@ CONFIG_FIXED_PHY=y
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
 CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index dcdd51b57783..aef41b17a8bc 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -55,9 +55,6 @@ CONFIG_B43_SDIO=y
 # CONFIG_B43_PHY_LP is not set
 CONFIG_B43_DEBUG=y
 CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
 CONFIG_INPUT_JOYDEV=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
@@ -68,7 +65,6 @@ CONFIG_INPUT_UINPUT=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_LEGACY_PTY_COUNT=64
-# CONFIG_DEVKMEM is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_NVRAM=y
 CONFIG_I2C=y
@@ -119,5 +115,4 @@ CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_DMA_API_DEBUG=y
 CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 5c4fbc80dc6c..2542ea15d338 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += vtime.h
+generic-y += msi.h
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index cee3aa087653..7f2a7702596c 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -25,7 +25,7 @@
 #define PPC_LCMPI	stringify_in_c(cmpdi)
 #define PPC_LCMPLI	stringify_in_c(cmpldi)
 #define PPC_LCMP	stringify_in_c(cmpd)
-#define PPC_LONG	stringify_in_c(.llong)
+#define PPC_LONG	stringify_in_c(.8byte)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 8)
 #define PPC_TLNEI	stringify_in_c(tdnei)
 #define PPC_LLARX(t, a, b, eh)	PPC_LDARX(t, a, b, eh)
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 25d42bd3f114..9c601adfc500 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,13 +74,6 @@ do {									\
 	___p1;								\
 })
 
-/*
- * This must resolve to hwsync on SMP for the context switch path.
- * See _switch, and core scheduler context switch memory ordering
- * comments.
- */
-#define smp_mb__before_spinlock()   smp_mb()
-
 #include <asm-generic/barrier.h>
 
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 7fb755880409..4d453f979553 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -294,13 +294,11 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
 
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
-		      pmd_t **pmdp);
-
 int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
 
 /* Generic accessors to PTE bits */
 static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_read(pte_t pte)		{ return 1; }
 static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
 static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
 static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 36fc7bfe9e11..f88452019114 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -40,7 +40,7 @@
  * Define the address range of the kernel non-linear virtual area
  */
 #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define H_KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+#define H_KERN_VIRT_SIZE  ASM_CONST(0x0000400000000000) /* 64T */
 
 /*
  * The vmalloc space starts at the beginning of that region, and
@@ -48,9 +48,11 @@
  * (we keep a quarter for the virtual memmap)
  */
 #define H_VMALLOC_START	H_KERN_VIRT_START
-#define H_VMALLOC_SIZE	(H_KERN_VIRT_SIZE >> 1)
+#define H_VMALLOC_SIZE	ASM_CONST(0x380000000000) /* 56T */
 #define H_VMALLOC_END	(H_VMALLOC_START + H_VMALLOC_SIZE)
 
+#define H_KERN_IO_START	H_VMALLOC_END
+
 /*
  * Region IDs
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5c28bd6f2ae1..2d1ca488ca44 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -54,9 +54,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 static inline bool gigantic_page_supported(void)
 {
-	if (radix_enabled())
-		return true;
-	return false;
+	return true;
 }
 #endif
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 6981a52b3887..f28d21c69f79 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -468,7 +468,7 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     int psize, int ssize);
 int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 			int psize, int ssize);
-extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
+extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
 
 #ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5b4023c616f7..c3b00e8ff791 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -83,6 +83,9 @@ typedef struct {
 	mm_context_id_t id;
 	u16 user_psize;		/* page size index */
 
+	/* Number of bits in the mm_cpumask */
+	atomic_t active_cpus;
+
 	/* NPU NMMU context */
 	struct npu_context *npu_context;
 
@@ -97,11 +100,6 @@ typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
-	struct spinlock *cop_lockp; /* guard acop and cop_pid */
-	unsigned long acop;	/* mask of enabled coprocessor types */
-	unsigned int cop_pid;	/* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
 #ifdef CONFIG_PPC_64K_PAGES
 	/* for 4K PTE fragment support */
 	void *pte_frag;
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index e2329db9d6f4..1fcfa425cefa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,8 +41,6 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
-
 extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
 extern void pte_fragment_free(unsigned long *, int);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 818a58fc3f4f..b9aff515b4de 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -272,8 +272,10 @@ extern unsigned long __vmalloc_end;
 
 extern unsigned long __kernel_virt_start;
 extern unsigned long __kernel_virt_size;
+extern unsigned long __kernel_io_start;
 #define KERN_VIRT_START __kernel_virt_start
 #define KERN_VIRT_SIZE  __kernel_virt_size
+#define KERN_IO_START  __kernel_io_start
 extern struct page *vmemmap;
 extern unsigned long ioremap_bot;
 extern unsigned long pci_io_base;
@@ -298,7 +300,6 @@ extern unsigned long pci_io_base;
  *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
  * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
  */
-#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
 #define FULL_IO_SIZE	0x80000000ul
 #define  ISA_IO_BASE	(KERN_IO_START)
 #define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
@@ -409,6 +410,11 @@ static inline int pte_write(pte_t pte)
 	return __pte_write(pte) || pte_savedwrite(pte);
 }
 
+static inline int pte_read(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
@@ -1167,6 +1173,7 @@ static inline bool arch_needs_pgtable_deposit(void)
 		return false;
 	return true;
 }
+extern void serialize_against_pte_lookup(struct mm_struct *mm);
 
 
 static inline pmd_t pmd_mkdevmap(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 544440b5aff3..1e5ba94e62ef 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -110,6 +110,8 @@
  */
 #define RADIX_VMEMMAP_BASE		(RADIX_VMALLOC_END)
 
+#define RADIX_KERN_IO_START	(RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+
 #ifndef __ASSEMBLY__
 #define RADIX_PTE_TABLE_SIZE	(sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
 #define RADIX_PMD_TABLE_SIZE	(sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index cc7fbde4f53c..9b433a624bf3 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -22,22 +22,21 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end
 
 extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 					      int psize);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 					int psize);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
 #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
-#define radix__flush_tlb_pwc(tlb, addr)	radix__local_flush_tlb_pwc(tlb, addr)
 #endif
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
 extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
 				     unsigned long page_size);
 extern void radix__flush_tlb_lpid(unsigned long lpid);
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 87fcc1948817..7ee763d3bea9 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,6 +133,7 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
 extern void bad_page_fault(struct pt_regs *, unsigned long, int);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void die(const char *, struct pt_regs *, long);
+extern bool die_will_crash(void);
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 5a90292afbad..d122f7f957ce 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -5,7 +5,7 @@
 
 
 /* bytes per L1 cache line */
-#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX)
 #define L1_CACHE_SHIFT		4
 #define MAX_COPY_PREFETCH	1
 #elif defined(CONFIG_PPC_E500MC)
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 52586f9956bb..eb43b5c3a7b5 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -67,6 +67,17 @@
 #define ERR_DEEP_STATE_ESL_MISMATCH	-2
 
 #ifndef __ASSEMBLY__
+/* Additional SPRs that need to be saved/restored during stop */
+struct stop_sprs {
+	u64 pid;
+	u64 ldbar;
+	u64 fscr;
+	u64 hfscr;
+	u64 mmcr1;
+	u64 mmcr2;
+	u64 mmcra;
+};
+
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
 
@@ -90,20 +101,4 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
 
 #endif
 
-/* Idle state entry routines */
-#ifdef	CONFIG_PPC_P7_NAP
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
-	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
-	std	r0,0(r1);					\
-	ptesync;						\
-	ld	r0,0(r1);					\
-236:	cmpd	cr0,r0,r0;					\
-	bne	236b;						\
-	IDLE_INST;						\
-
-#define	IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
-	IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
-	b	.
-#endif /* CONFIG_PPC_P7_NAP */
-
 #endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d02ad93bf708..a9bf921f4efc 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -513,7 +513,7 @@ enum {
 #else
 	    CPU_FTRS_GENERIC_32 |
 #endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 	    CPU_FTRS_8XX |
 #endif
 #ifdef CONFIG_40x
@@ -565,7 +565,7 @@ enum {
 #else
 	    CPU_FTRS_GENERIC_32 &
 #endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 	    CPU_FTRS_8XX &
 #endif
 #ifdef CONFIG_40x
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8e37b71674f4..9847ae3a12d1 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -131,7 +131,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
 	int class_code;			/* Class code of the device	*/
-	int config_addr;		/* Config address		*/
 	int pe_config_addr;		/* PE config address		*/
 	u32 config_space[16];		/* Saved PCI config space	*/
 	int pcix_cap;			/* Saved PCIx capability	*/
@@ -141,7 +140,6 @@ struct eeh_dev {
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct list_head rmv_list;	/* Record the removed edevs	*/
-	struct pci_controller *phb;	/* Associated PHB		*/
 	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
 	bool in_error;			/* Error flag for edev		*/
@@ -262,7 +260,8 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+			  int pe_no, int config_addr);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index ce88bbe1d809..5a23010af600 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -209,11 +209,13 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
 extern int fadump_reserve_mem(void);
 extern int setup_fadump(void);
 extern int is_fadump_active(void);
+extern int should_fadump_crash(void);
 extern void crash_fadump(struct pt_regs *, const char *);
 extern void fadump_cleanup(void);
 
 #else	/* CONFIG_FA_DUMP */
 static inline int is_fadump_active(void) { return 0; }
+static inline int should_fadump_crash(void) { return 0; }
 static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
 #endif
 #endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 2de2319b99e2..8f88f771cc55 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -19,11 +19,11 @@
  */
 #if defined(CONFIG_PPC64) && !defined(__powerpc64__)
 /* 64 bits kernel, 32 bits code (ie. vdso32) */
-#define FTR_ENTRY_LONG		.llong
+#define FTR_ENTRY_LONG		.8byte
 #define FTR_ENTRY_OFFSET	.long 0xffffffff; .long
 #elif defined(CONFIG_PPC64)
-#define FTR_ENTRY_LONG		.llong
-#define FTR_ENTRY_OFFSET	.llong
+#define FTR_ENTRY_LONG		.8byte
+#define FTR_ENTRY_OFFSET	.8byte
 #else
 #define FTR_ENTRY_LONG		.long
 #define FTR_ENTRY_OFFSET	.long
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 4508b322f2cd..6c40dfda5912 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -17,6 +17,7 @@
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
@@ -62,9 +63,6 @@ enum fixed_addresses {
 	__end_of_fixed_addresses
 };
 
-extern void __set_fixmap (enum fixed_addresses idx,
-					phys_addr_t phys, pgprot_t flags);
-
 #define __FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START		(FIXADDR_TOP - __FIXADDR_SIZE)
 
@@ -72,5 +70,11 @@ extern void __set_fixmap (enum fixed_addresses idx,
 
 #include <asm-generic/fixmap.h>
 
+static inline void __set_fixmap(enum fixed_addresses idx,
+				phys_addr_t phys, pgprot_t flags)
+{
+	map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags));
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
index f79d6c74eb2a..8def56ec05c6 100644
--- a/arch/powerpc/include/asm/fs_pd.h
+++ b/arch/powerpc/include/asm/fs_pd.h
@@ -26,7 +26,7 @@
 #define cpm2_unmap(addr) do {} while(0)
 #endif
 
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 #include <asm/8xx_immap.h>
 
 extern immap_t __iomem *mpc8xx_immr;
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index eaada6c92344..719ed9b61ea7 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -29,18 +29,10 @@
 	: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
 	: "cr0", "memory")
 
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 	pagefault_disable();
 
@@ -66,17 +58,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 8add8b861e8d..c97603d617e3 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -12,6 +12,10 @@ typedef struct {
 	unsigned int mce_exceptions;
 	unsigned int spurious_irqs;
 	unsigned int hmi_exceptions;
+	unsigned int sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+	unsigned int soft_nmi_irqs;
+#endif
 #ifdef CONFIG_PPC_DOORBELL
 	unsigned int doorbell_irqs;
 #endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7f4025a6c69e..b8a0fb442c64 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -218,18 +218,4 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
 }
 #endif /* CONFIG_HUGETLB_PAGE */
 
-/*
- * FSL Book3E platforms require special gpage handling - the gpages
- * are reserved early in the boot process by memblock instead of via
- * the .dts as on IBM platforms.
- */
-#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \
-    defined(CONFIG_PPC_8xx))
-extern void __init reserve_hugetlb_gpages(void);
-#else
-static inline void reserve_hugetlb_gpages(void)
-{
-}
-#endif
-
 #endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 57d38b504ff7..3d34dc0869f6 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -280,7 +280,18 @@
 #define H_RESIZE_HPT_COMMIT	0x370
 #define H_REGISTER_PROC_TBL	0x37C
 #define H_SIGNAL_SYS_RESET	0x380
-#define MAX_HCALL_OPCODE	H_SIGNAL_SYS_RESET
+#define H_INT_GET_SOURCE_INFO   0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO    0x3B4
+#define H_INT_SET_QUEUE_CONFIG  0x3B8
+#define H_INT_GET_QUEUE_CONFIG  0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB               0x3C8
+#define H_INT_SYNC              0x3CC
+#define H_INT_RESET             0x3D0
+#define MAX_HCALL_OPCODE	H_INT_RESET
 
 /* H_VIOCTL functions */
 #define H_GET_VIOA_DUMP_SIZE	0x01
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
index 27e588f6c72e..6a2c87577541 100644
--- a/arch/powerpc/include/asm/icswx.h
+++ b/arch/powerpc/include/asm/icswx.h
@@ -69,7 +69,10 @@ struct coprocessor_completion_block {
 #define CSB_CC_WR_PROTECTION	(16)
 #define CSB_CC_UNKNOWN_CODE	(17)
 #define CSB_CC_ABORT		(18)
+#define CSB_CC_EXCEED_BYTE_COUNT	(19)	/* P9 or later */
 #define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_INVALID_CRB	(21)	/* P9 or later */
+#define CSB_CC_INVALID_DDE	(30)	/* P9 or later */
 #define CSB_CC_SEGMENTED_DDL	(31)
 #define CSB_CC_PROGRESS_POINT	(32)
 #define CSB_CC_DDE_OVERFLOW	(33)
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 000000000000..7f74c282710f
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_POWERPC_IMC_PMU_H
+#define __ASM_POWERPC_IMC_PMU_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *           (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *           (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/io.h>
+#include <asm/opal.h>
+
+/*
+ * For static allocation of some of the structures.
+ */
+#define IMC_MAX_PMUS			32
+
+/*
+ * Compatibility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT			"ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT		"ibm,imc-counters"
+
+
+/*
+ * LDBAR: Counter address and Enable/Disable macro.
+ * perf/imc-pmu.c has the LDBAR layout information.
+ */
+#define THREAD_IMC_LDBAR_MASK           0x0003ffffffffe000ULL
+#define THREAD_IMC_ENABLE               0x8000000000000000ULL
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+	u64 *vbase;
+	u32 id;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+	u32 value;
+	char *name;
+	char *unit;
+	char *scale;
+};
+
+/* Event attribute array index */
+#define IMC_FORMAT_ATTR		0
+#define IMC_EVENT_ATTR		1
+#define IMC_CPUMASK_ATTR	2
+#define IMC_NULL_ATTR		3
+
+/* PMU Format attribute macros */
+#define IMC_EVENT_OFFSET_MASK	0xffffffffULL
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+	struct pmu pmu;
+	struct imc_mem_info *mem_info;
+	struct imc_events **events;
+	/*
+	 * Attribute groups for the PMU. Slot 0 used for
+	 * format attribute, slot 1 used for cpusmask attribute,
+	 * slot 2 used for event attribute. Slot 3 keep as
+	 * NULL.
+	 */
+	const struct attribute_group *attr_groups[4];
+	u32 counter_mem_size;
+	int domain;
+	/*
+	 * flag to notify whether the memory is mmaped
+	 * or allocated by kernel.
+	 */
+	bool imc_counter_mmaped;
+};
+
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+	struct mutex lock;
+	unsigned int id;
+	int refc;
+};
+
+/*
+ * In-Memory Collection Counters type.
+ * Data comes from Device tree.
+ * Three device type are supported.
+ */
+
+enum {
+	IMC_TYPE_THREAD		= 0x1,
+	IMC_TYPE_CORE		= 0x4,
+	IMC_TYPE_CHIP           = 0x10,
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST		1
+#define IMC_DOMAIN_CORE		2
+#define IMC_DOMAIN_THREAD	3
+
+extern int init_imc_pmu(struct device_node *parent,
+				struct imc_pmu *pmu_ptr, int pmu_id);
+extern void thread_imc_disable(void);
+#endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 7cea76f11c26..83596f32f50b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,6 +104,10 @@ struct kvmppc_host_state {
 	u8 napping;
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/*
+	 * hwthread_req/hwthread_state pair is used to pull sibling threads
+	 * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
+	 */
 	u8 hwthread_req;
 	u8 hwthread_state;
 	u8 host_ipi;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index cd2fc1cc1cc7..73b92017b6d7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,7 +76,6 @@ struct machdep_calls {
 
 	void __noreturn	(*restart)(char *cmd);
 	void __noreturn (*halt)(void);
-	void		(*panic)(char *str);
 	void		(*cpu_die)(void);
 
 	long		(*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 35bec1c5bd5a..309592589e30 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -77,76 +77,8 @@ extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
 
-/*
- * switch_mm is the entry point called from the architecture independent
- * code in kernel/sched/core.c
- */
-static inline void switch_mm_irqs_off(struct mm_struct *prev,
-				      struct mm_struct *next,
-				      struct task_struct *tsk)
-{
-	bool new_on_cpu = false;
-
-	/* Mark this context has been used on the new CPU */
-	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
-
-		/*
-		 * This full barrier orders the store to the cpumask above vs
-		 * a subsequent operation which allows this CPU to begin loading
-		 * translations for next.
-		 *
-		 * When using the radix MMU that operation is the load of the
-		 * MMU context id, which is then moved to SPRN_PID.
-		 *
-		 * For the hash MMU it is either the first load from slb_cache
-		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
-		 * copy_mm_to_paca().
-		 *
-		 * On the read side the barrier is in pte_xchg(), which orders
-		 * the store to the PTE vs the load of mm_cpumask.
-		 */
-		smp_mb();
-
-		new_on_cpu = true;
-	}
-
-	/* 32-bit keeps track of the current PGDIR in the thread struct */
-#ifdef CONFIG_PPC32
-	tsk->thread.pgdir = next->pgd;
-#endif /* CONFIG_PPC32 */
-
-	/* 64-bit Book3E keeps track of current PGD in the PACA */
-#ifdef CONFIG_PPC_BOOK3E_64
-	get_paca()->pgd = next->pgd;
-#endif
-	/* Nothing else to do if we aren't actually switching */
-	if (prev == next)
-		return;
-
-#ifdef CONFIG_PPC_ICSWX
-	/* Switch coprocessor context only if prev or next uses a coprocessor */
-	if (prev->context.acop || next->context.acop)
-		switch_cop(next);
-#endif /* CONFIG_PPC_ICSWX */
-
-	/* We must stop all altivec streams before changing the HW
-	 * context
-	 */
-#ifdef CONFIG_ALTIVEC
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		asm volatile ("dssall");
-#endif /* CONFIG_ALTIVEC */
-
-	if (new_on_cpu)
-		radix_kvm_prefetch_workaround(next);
-
-	/*
-	 * The actual HW switching method differs between the various
-	 * sub architectures. Out of line for now
-	 */
-	switch_mmu_context(prev, next, tsk);
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk);
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
@@ -168,11 +100,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	switch_mm(prev, next, current);
-	local_irq_restore(flags);
 }
 
 /* We don't currently use enter_lazy_tlb() for anything */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 6f8e79cd35d8..3760150a0ff0 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -1,9 +1,8 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_PPC_WATCHDOG
 extern void arch_touch_nmi_watchdog(void);
-
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
 					   bool exclude_self);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 91314268f04f..185c6a47f9ba 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -121,7 +121,7 @@ extern int icache_44x_need_flush;
 #include <asm/nohash/pte-book3e.h>
 #elif defined(CONFIG_FSL_BOOKE)
 #include <asm/nohash/32/pte-fsl-booke.h>
-#elif defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_8xx)
 #include <asm/nohash/32/pte-8xx.h>
 #endif
 
@@ -337,9 +337,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
 
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
-		      pmd_t **pmdp);
-
 int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index e5805ad78e12..17989c3d9a24 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -14,6 +14,7 @@ static inline int pte_write(pte_t pte)
 {
 	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
 }
+static inline int pte_read(pte_t pte)		{ return 1; }
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 3130a73652c7..450a60b81d2a 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -42,6 +42,7 @@
 #define OPAL_I2C_STOP_ERR	-24
 #define OPAL_XIVE_PROVISIONING	-31
 #define OPAL_XIVE_FREE_ACTIVE	-32
+#define OPAL_TIMEOUT		-33
 
 /* API Tokens (in r0) */
 #define OPAL_INVALID_CALL		       -1
@@ -190,7 +191,16 @@
 #define OPAL_NPU_INIT_CONTEXT			146
 #define OPAL_NPU_DESTROY_CONTEXT		147
 #define OPAL_NPU_MAP_LPAR			148
-#define OPAL_LAST				148
+#define OPAL_IMC_COUNTERS_INIT			149
+#define OPAL_IMC_COUNTERS_START			150
+#define OPAL_IMC_COUNTERS_STOP			151
+#define OPAL_GET_POWERCAP			152
+#define OPAL_SET_POWERCAP			153
+#define OPAL_GET_POWER_SHIFT_RATIO		154
+#define OPAL_SET_POWER_SHIFT_RATIO		155
+#define OPAL_SENSOR_GROUP_CLEAR			156
+#define OPAL_PCI_SET_P2P			157
+#define OPAL_LAST				157
 
 /* Device tree flags */
 
@@ -1084,6 +1094,18 @@ enum {
 	XIVE_DUMP_EMU_STATE	= 5,
 };
 
+/* "type" argument options for OPAL_IMC_COUNTERS_* calls */
+enum {
+	OPAL_IMC_COUNTERS_NEST = 1,
+	OPAL_IMC_COUNTERS_CORE = 2,
+};
+
+
+/* PCI p2p descriptor */
+#define OPAL_PCI_P2P_ENABLE		0x1
+#define OPAL_PCI_P2P_LOAD		0x2
+#define OPAL_PCI_P2P_STORE		0x4
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c23af9..726c23304a57 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -50,7 +50,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
 		       uint32_t hour_min);
 int64_t opal_cec_power_down(uint64_t request);
 int64_t opal_cec_reboot(void);
-int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag);
+int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag);
 int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
@@ -267,6 +267,19 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
 int64_t opal_xive_free_irq(uint32_t girq);
 int64_t opal_xive_sync(uint32_t type, uint32_t id);
 int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
+			uint64_t desc, uint16_t pe_number);
+
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
+							uint64_t cpu_pir);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
+int opal_get_powercap(u32 handle, int token, u32 *pcap);
+int opal_set_powercap(u32 handle, int token, u32 pcap);
+int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
+int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
+int opal_sensor_group_clear(u32 group_hndl, int token);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -345,6 +358,10 @@ static inline int opal_get_async_rc(struct opal_msg msg)
 
 void opal_wake_poller(void);
 
+void opal_powercap_init(void);
+void opal_psr_init(void);
+void opal_sensor_groups_init(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index dc88a31cc79a..04b60af027ae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -31,6 +31,7 @@
 #endif
 #include <asm/accounting.h>
 #include <asm/hmi.h>
+#include <asm/cpuidle.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -183,6 +184,12 @@ struct paca_struct {
 	struct paca_struct **thread_sibling_pacas;
 	/* The PSSCR value that the kernel requested before going to stop */
 	u64 requested_psscr;
+
+	/*
+	 * Save area for additional SPRs that need to be
+	 * saved/restored during cpuidle stop.
+	 */
+	struct stop_sprs stop_sprs;
 #endif
 
 #ifdef CONFIG_PPC_STD_MMU_64
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 56c67d3f0108..0b8aa1fe2d5f 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -195,7 +195,6 @@ struct pci_dn {
 	struct  pci_dn *parent;
 	struct  pci_controller *phb;	/* for pci devices */
 	struct	iommu_table_group *table_group;	/* for phb's or bridges */
-	struct	device_node *node;	/* back-pointer to the device_node */
 
 	int	pci_ext_config_space;	/* for pci devices */
 
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index d795c5d5789c..45ae1212ab8a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -17,6 +17,8 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
 }
 #endif /* MODULE */
 
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgalloc.h>
 #else
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index afae9a336136..7d0d38f58243 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,22 +66,14 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_large(pmd)		0
 #endif
-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
-				   bool *is_thp, unsigned *shift);
-static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
-					       bool *is_thp, unsigned *shift)
-{
-	VM_WARN(!arch_irqs_disabled(),
-		"%s called with irq enabled\n", __func__);
-	return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
-}
 
+/* can we use this in kvm */
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
 void mark_initmem_nx(void);
 #else
 static inline void mark_initmem_nx(void) { }
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index de9681034353..3e5cf251ad9a 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
 extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
 extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
 				   struct opal_msg *msg);
+extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
+			   u64 desc);
 
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index fa9ebaead91e..ce0930d68857 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -193,6 +193,7 @@
 #define PPC_INST_CLRBHRB		0x7c00035c
 #define PPC_INST_COPY			0x7c20060c
 #define PPC_INST_CP_ABORT		0x7c00068c
+#define PPC_INST_DARN			0x7c0005e6
 #define PPC_INST_DCBA			0x7c0005ec
 #define PPC_INST_DCBA_MASK		0xfc0007fe
 #define PPC_INST_DCBAL			0x7c2005ec
@@ -204,6 +205,8 @@
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
 #define PPC_INST_STDCX			0x7c0001ad
+#define PPC_INST_LQARX			0x7c000228
+#define PPC_INST_STQCX			0x7c00016d
 #define PPC_INST_LSWI			0x7c0004aa
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
@@ -261,7 +264,7 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_VPMSUMW		0x10000488
 #define PPC_INST_VPMSUMD		0x100004c8
-#define PPC_INST_XXLOR			0xf0000510
+#define PPC_INST_XXLOR			0xf0000490
 #define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
 #define PPC_INST_TRECHKPT		0x7c0007dd
@@ -395,16 +398,25 @@
 #define	PPC_CP_ABORT		stringify_in_c(.long PPC_INST_CP_ABORT)
 #define	PPC_COPY(a, b)		stringify_in_c(.long PPC_INST_COPY | \
 					___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_DARN(t, l)		stringify_in_c(.long PPC_INST_DARN |  \
+						___PPC_RT(t)	   |  \
+						(((l) & 0x3) << 16))
 #define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_INST_DCBAL | \
 					__PPC_RA(a) | __PPC_RB(b))
 #define	PPC_DCBZL(a, b)		stringify_in_c(.long PPC_INST_DCBZL | \
 					__PPC_RA(a) | __PPC_RB(b))
+#define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LQARX | \
+					___PPC_RT(t) | ___PPC_RA(a) | \
+					___PPC_RB(b) | __PPC_EH(eh))
 #define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LDARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
 #define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LWARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_INST_STQCX | \
+					___PPC_RT(t) | ___PPC_RA(a) | \
+					___PPC_RB(b))
 #define PPC_MSGSND(b)		stringify_in_c(.long PPC_INST_MSGSND | \
 					___PPC_RB(b))
 #define PPC_MSGSYNC		stringify_in_c(.long PPC_INST_MSGSYNC)
@@ -414,6 +426,8 @@
 					___PPC_RB(b))
 #define PPC_MSGCLRP(b)		stringify_in_c(.long PPC_INST_MSGCLRP | \
 					___PPC_RB(b))
+#define PPC_PASTE(a, b)		stringify_in_c(.long PPC_INST_PASTE | \
+					___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_POPCNTB(a, s)	stringify_in_c(.long PPC_INST_POPCNTB | \
 					__PPC_RA(a) | __PPC_RS(s))
 #define PPC_POPCNTD(a, s)	stringify_in_c(.long PPC_INST_POPCNTD | \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 6baeeb9acd0d..36f3e41c9fbe 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -378,10 +378,16 @@ BEGIN_FTR_SECTION_NESTED(96);		\
 	cmpwi dest,0;			\
 	beq-  90b;			\
 END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
-#elif defined(CONFIG_8xx)
-#define MFTB(dest)			mftb dest
 #else
-#define MFTB(dest)			mfspr dest, SPRN_TBRL
+#define MFTB(dest)			MFTBL(dest)
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest)			mftb dest
+#define MFTBU(dest)			mftbu dest
+#else
+#define MFTBL(dest)			mfspr dest, SPRN_TBRL
+#define MFTBU(dest)			mfspr dest, SPRN_TBRU
 #endif
 
 #ifndef CONFIG_SMP
@@ -411,7 +417,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
  * and they must be used.
  */
 
-#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
+#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx)
 #define tlbia					\
 	li	r4,1024;			\
 	mtctr	r4;				\
@@ -439,7 +445,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 .machine push ;					\
 .machine "power4" ;				\
        lis     scratch,0x60000000@h;		\
-       dcbt    r0,scratch,0b01010;		\
+       dcbt    0,scratch,0b01010;		\
 .machine pop
 
 /*
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 35c00d7a0cf8..825bd5998701 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -159,7 +159,10 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_842		0x1140	/* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR		0x1120	/* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-Processors supported */
-#define OV5_XIVE_EXPLOIT	0x1701	/* XIVE exploitation supported */
+#define OV5_XIVE_SUPPORT	0x17C0	/* XIVE Exploitation Support Mask */
+#define OV5_XIVE_LEGACY		0x1700	/* XIVE legacy mode Only */
+#define OV5_XIVE_EXPLOIT	0x1740	/* XIVE exploitation mode Only */
+#define OV5_XIVE_EITHER		0x1780	/* XIVE legacy or exploitation mode */
 /* MMU Base Architecture */
 #define OV5_MMU_SUPPORT		0x18C0	/* MMU Mode Support Mask */
 #define OV5_MMU_HASH		0x1800	/* Hash MMU Only */
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
new file mode 100644
index 000000000000..2d633e9d686c
--- /dev/null
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -0,0 +1,35 @@
+#ifndef _ASM_POWERPC_PTE_WALK_H
+#define _ASM_POWERPC_PTE_WALK_H
+
+#include <linux/sched.h>
+
+/* Don't use this directly */
+extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+			       bool *is_thp, unsigned *hshift);
+
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
+				    bool *is_thp, unsigned *hshift)
+{
+	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+	return __find_linux_pte(pgdir, ea, is_thp, hshift);
+}
+
+static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
+{
+	pgd_t *pgdir = init_mm.pgd;
+	return __find_linux_pte(pgdir, ea, NULL, hshift);
+}
+/*
+ * This is what we should always use. Any other lockless page table lookup needs
+ * careful audit against THP split.
+ */
+static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
+					 bool *is_thp, unsigned *hshift)
+{
+	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+	VM_WARN(pgdir != current->mm->pgd,
+		"%s lock less page table lookup called on wrong mm\n", __func__);
+	return __find_linux_pte(pgdir, ea, is_thp, hshift);
+}
+
+#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a3b6575c7842..f92eaf7a4c0d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -22,9 +22,9 @@
 #include <asm/reg_fsl_emb.h>
 #endif
 
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 #include <asm/reg_8xx.h>
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
 
 #define MSR_SF_LG	63              /* Enable 64 bit mode */
 #define MSR_ISF_LG	61              /* Interrupt 64b mode valid on 630 */
@@ -135,7 +135,7 @@
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
 #define MSR_USER64	(MSR_USER32 | MSR_64BIT)
-#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx)
 /* Default MSR for kernel mode. */
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
@@ -272,16 +272,65 @@
 #define SPRN_DAR	0x013	/* Data Address Register */
 #define SPRN_DBCR	0x136	/* e300 Data Breakpoint Control Reg */
 #define SPRN_DSISR	0x012	/* Data Storage Interrupt Status Register */
-#define   DSISR_NOHPTE		0x40000000	/* no translation found */
-#define   DSISR_PROTFAULT	0x08000000	/* protection fault */
-#define   DSISR_BADACCESS	0x04000000	/* bad access to CI or G */
-#define   DSISR_ISSTORE		0x02000000	/* access was a store */
-#define   DSISR_DABRMATCH	0x00400000	/* hit data breakpoint */
-#define   DSISR_NOSEGMENT	0x00200000	/* SLB miss */
-#define   DSISR_KEYFAULT	0x00200000	/* Key fault */
-#define   DSISR_UNSUPP_MMU	0x00080000	/* Unsupported MMU config */
-#define   DSISR_SET_RC		0x00040000	/* Failed setting of R/C bits */
-#define   DSISR_PGDIRFAULT      0x00020000      /* Fault on page directory */
+#define   DSISR_BAD_DIRECT_ST	0x80000000 /* Obsolete: Direct store error */
+#define   DSISR_NOHPTE		0x40000000 /* no translation found */
+#define   DSISR_ATTR_CONFLICT	0x20000000 /* P9: Process vs. Partition attr */
+#define   DSISR_NOEXEC_OR_G	0x10000000 /* Alias of SRR1 bit, see below */
+#define   DSISR_PROTFAULT	0x08000000 /* protection fault */
+#define   DSISR_BADACCESS	0x04000000 /* bad access to CI or G */
+#define   DSISR_ISSTORE		0x02000000 /* access was a store */
+#define   DSISR_DABRMATCH	0x00400000 /* hit data breakpoint */
+#define   DSISR_NOSEGMENT	0x00200000 /* STAB miss (unsupported) */
+#define   DSISR_KEYFAULT	0x00200000 /* Storage Key fault */
+#define   DSISR_BAD_EXT_CTRL	0x00100000 /* Obsolete: External ctrl error */
+#define   DSISR_UNSUPP_MMU	0x00080000 /* P9: Unsupported MMU config */
+#define   DSISR_SET_RC		0x00040000 /* P9: Failed setting of R/C bits */
+#define   DSISR_PRTABLE_FAULT   0x00020000 /* P9: Fault on process table */
+#define   DSISR_ICSWX_NO_CT     0x00004000 /* P7: icswx unavailable cp type */
+#define   DSISR_BAD_COPYPASTE   0x00000008 /* P9: Copy/Paste on wrong memtype */
+#define   DSISR_BAD_AMO		0x00000004 /* P9: Incorrect AMO opcode */
+#define   DSISR_BAD_CI_LDST	0x00000002 /* P8: Bad HV CI load/store */
+
+/*
+ * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always
+ * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1
+ * indicates an attempt at executing from a no-execute PTE
+ * or segment or from a guarded page.
+ *
+ * We add a definition here for completeness as we alias
+ * DSISR and SRR1 in do_page_fault.
+ */
+
+/*
+ * DSISR bits that are treated as a fault. Any bit set
+ * here will skip hash_page, and cause do_page_fault to
+ * trigger a SIGBUS or SIGSEGV:
+ */
+#define   DSISR_BAD_FAULT_32S	(DSISR_BAD_DIRECT_ST	| \
+				 DSISR_BADACCESS	| \
+				 DSISR_BAD_EXT_CTRL)
+#define	  DSISR_BAD_FAULT_64S	(DSISR_BAD_FAULT_32S	| \
+				 DSISR_ATTR_CONFLICT	| \
+				 DSISR_KEYFAULT		| \
+				 DSISR_UNSUPP_MMU	| \
+				 DSISR_PRTABLE_FAULT	| \
+				 DSISR_ICSWX_NO_CT	| \
+				 DSISR_BAD_COPYPASTE	| \
+				 DSISR_BAD_AMO		| \
+				 DSISR_BAD_CI_LDST)
+/*
+ * These bits are equivalent in SRR1 and DSISR for 0x400
+ * instruction access interrupts on Book3S
+ */
+#define   DSISR_SRR1_MATCH_32S	(DSISR_NOHPTE		| \
+				 DSISR_NOEXEC_OR_G	| \
+				 DSISR_PROTFAULT)
+#define   DSISR_SRR1_MATCH_64S	(DSISR_SRR1_MATCH_32S	| \
+				 DSISR_KEYFAULT		| \
+				 DSISR_UNSUPP_MMU	| \
+				 DSISR_SET_RC		| \
+				 DSISR_PRTABLE_FAULT)
+
 #define SPRN_TBRL	0x10C	/* Time Base Read Lower Register (user, R/O) */
 #define SPRN_TBRU	0x10D	/* Time Base Read Upper Register (user, R/O) */
 #define SPRN_CIR	0x11B	/* Chip Information Register (hyper, R/0) */
@@ -307,6 +356,7 @@
 #define SPRN_PMSR	0x355   /* Power Management Status Reg */
 #define SPRN_PMMAR	0x356	/* Power Management Memory Activity Register */
 #define SPRN_PSSCR	0x357	/* Processor Stop Status and Control Register (ISA 3.0) */
+#define SPRN_PSSCR_PR	0x337	/* PSSCR ISA 3.0, privileged mode access */
 #define SPRN_PMCR	0x374	/* Power Management Control Register */
 
 /* HFSCR and FSCR bit numbers are the same */
@@ -675,6 +725,7 @@
 					  * may not be recoverable */
 #define	  SRR1_WS_DEEPER	0x00020000 /* Some resources not maintained */
 #define	  SRR1_WS_DEEP		0x00010000 /* All resources maintained */
+#define   SRR1_PROGTM		0x00200000 /* TM Bad Thing */
 #define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
 #define   SRR1_PROGILL		0x00080000 /* Illegal instruction */
 #define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
@@ -1114,7 +1165,7 @@
 #endif
 #endif
 
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
 #define SPRN_SPRG_SCRATCH2	SPRN_SPRG2
@@ -1197,10 +1248,8 @@
  * differentiated by the version number in the Communication Processor
  * Module (CPM).
  */
-#define PVR_821		0x00500000
-#define PVR_823		PVR_821
-#define PVR_850		PVR_821
-#define PVR_860		PVR_821
+#define PVR_8xx		0x00500000
+
 #define PVR_8240	0x00810100
 #define PVR_8245	0x80811014
 #define PVR_8260	PVR_8240
@@ -1295,12 +1344,12 @@ static inline void msr_check_and_clear(unsigned long bits)
 				".section __ftr_fixup,\"a\"\n"		\
 				".align 3\n"				\
 				"98:\n"					\
-				"	.llong %1\n"			\
-				"	.llong %1\n"			\
-				"	.llong 97b-98b\n"		\
-				"	.llong 99b-98b\n"		\
-				"	.llong 0\n"			\
-				"	.llong 0\n"			\
+				"	.8byte %1\n"			\
+				"	.8byte %1\n"			\
+				"	.8byte 97b-98b\n"		\
+				"	.8byte 99b-98b\n"		\
+				"	.8byte 0\n"			\
+				"	.8byte 0\n"			\
 				".previous"				\
 			: "=r" (rval) \
 			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
@@ -1313,7 +1362,7 @@ static inline void msr_check_and_clear(unsigned long bits)
 
 #else /* __powerpc64__ */
 
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
 #define mftbl()		({unsigned long rval;	\
 			asm volatile("mftbl %0" : "=r" (rval)); rval;})
 #define mftbu()		({unsigned long rval;	\
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 737e012ef56e..eb2a33d5df26 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -221,10 +221,7 @@
 #define SPRN_CSRR0	SPRN_SRR2 /* Critical Save and Restore Register 0 */
 #define SPRN_CSRR1	SPRN_SRR3 /* Critical Save and Restore Register 1 */
 #endif
-
-#ifdef CONFIG_PPC_ICSWX
 #define SPRN_HACOP	0x15F	/* Hypervisor Available Coprocessor Register */
-#endif
 
 /* Bit definitions for CCR1. */
 #define	CCR1_DPC	0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 654d64c9f3ac..3a3fb0ca68f5 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,7 +23,6 @@ extern void reloc_got2(unsigned long);
 
 void check_for_initrd(void);
 void initmem_init(void);
-void setup_panic(void);
 #define ARCH_PANIC_TIMEOUT 180
 
 #ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 8ea98504f900..fac963e10d39 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -97,6 +97,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
 
 static inline struct cpumask *cpu_sibling_mask(int cpu)
@@ -109,6 +110,11 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 	return per_cpu(cpu_core_map, cpu);
 }
 
+static inline struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+	return per_cpu(cpu_l2_cache_map, cpu);
+}
+
 extern int cpu_to_core_id(int cpu);
 
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 8c1b913de6d7..edbe571bcc54 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -170,39 +170,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 0;
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	arch_spinlock_t lock_val;
-
-	smp_mb();
-
-	/*
-	 * Atomically load and store back the lock value (unchanged). This
-	 * ensures that our observation of the lock value is ordered with
-	 * respect to other lock operations.
-	 */
-	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0, 0, %2, 0) "\n"
-"	stwcx. %0, 0, %2\n"
-"	bne- 1b\n"
-	: "=&r" (lock_val), "+m" (*lock)
-	: "r" (lock)
-	: "cr0", "xer");
-
-	if (arch_spin_value_unlocked(lock_val))
-		goto out;
-
-	while (lock->slock) {
-		HMT_low();
-		if (SHARED_PROCESSOR)
-			__spin_yield(lock);
-	}
-	HMT_medium();
-
-out:
-	smp_mb();
-}
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -342,5 +309,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)	__rw_yield(lock)
 #define arch_write_relax(lock)	__rw_yield(lock)
 
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()   smp_mb()
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc45a82..ab9d849644d0 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -23,12 +23,9 @@ struct pt_regs;
 #define IS_RFID(instr)		(((instr) & 0xfc0007fe) == 0x4c000024)
 #define IS_RFI(instr)		(((instr) & 0xfc0007fe) == 0x4c000064)
 
-/* Emulate instructions that cause a transfer of control. */
-extern int emulate_step(struct pt_regs *regs, unsigned int instr);
-
 enum instruction_type {
 	COMPUTE,		/* arith/logical/CR op, etc. */
-	LOAD,
+	LOAD,			/* load and store types need to be contiguous */
 	LOAD_MULTI,
 	LOAD_FP,
 	LOAD_VMX,
@@ -55,10 +52,31 @@ enum instruction_type {
 
 #define INSTR_TYPE_MASK	0x1f
 
+#define OP_IS_LOAD_STORE(type)	(LOAD <= (type) && (type) <= STCX)
+
+/* Compute flags, ORed in with type */
+#define SETREG		0x20
+#define SETCC		0x40
+#define SETXER		0x80
+
+/* Branch flags, ORed in with type */
+#define SETLK		0x20
+#define BRTAKEN		0x40
+#define DECCTR		0x80
+
 /* Load/store flags, ORed in with type */
 #define SIGNEXT		0x20
 #define UPDATE		0x40	/* matches bit in opcode 31 instructions */
 #define BYTEREV		0x80
+#define FPCONV		0x100
+
+/* Barrier type field, ORed in with type */
+#define BARRIER_MASK	0xe0
+#define BARRIER_SYNC	0x00
+#define BARRIER_ISYNC	0x20
+#define BARRIER_EIEIO	0x40
+#define BARRIER_LWSYNC	0x60
+#define BARRIER_PTESYNC	0x80
 
 /* Cacheop values, ORed in with type */
 #define CACHEOP_MASK	0x700
@@ -67,10 +85,17 @@ enum instruction_type {
 #define DCBTST		0x200
 #define DCBT		0x300
 #define ICBI		0x400
+#define DCBZ		0x500
+
+/* VSX flags values */
+#define VSX_FPCONV	1	/* do floating point SP/DP conversion */
+#define VSX_SPLAT	2	/* store loaded value into all elements */
+#define VSX_LDLEFT	4	/* load VSX register from left */
+#define VSX_CHECK_VEC	8	/* check MSR_VEC not MSR_VSX for reg >= 32 */
 
 /* Size field in type word */
-#define SIZE(n)		((n) << 8)
-#define GETSIZE(w)	((w) >> 8)
+#define SIZE(n)		((n) << 12)
+#define GETSIZE(w)	((w) >> 12)
 
 #define MKOP(t, f, s)	((t) | (f) | SIZE(s))
 
@@ -83,7 +108,63 @@ struct instruction_op {
 	int update_reg;
 	/* For MFSPR */
 	int spr;
+	u32 ccval;
+	u32 xerval;
+	u8 element_size;	/* for VSX/VMX loads/stores */
+	u8 vsx_flags;
+};
+
+union vsx_reg {
+	u8	b[16];
+	u16	h[8];
+	u32	w[4];
+	unsigned long d[2];
+	float	fp[4];
+	double	dp[2];
+	__vector128 v;
 };
 
-extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			 unsigned int instr);
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * arithmetic/logical instructions, loads and stores,
+ * cache operations and barriers.
+ *
+ * Returns 1 if the instruction was emulated successfully,
+ * 0 if it could not be emulated, or -1 for an instruction that
+ * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
+ */
+extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+
+/*
+ * Emulate a load or store instruction by reading/writing the
+ * memory of the current process.  FP/VMX/VSX registers are assumed
+ * to hold live values if the appropriate enable bit in regs->msr is
+ * set; otherwise this will use the saved values in the thread struct
+ * for user-mode accesses.
+ */
+extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op);
+
+extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+			     const void *mem, bool cross_endian);
+extern void emulate_vsx_store(struct instruction_op *op,
+			      const union vsx_reg *reg, void *mem,
+			      bool cross_endian);
+extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index da3cdffca440..cc9addefb51c 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,6 +10,7 @@
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET16
 
 extern char * strcpy(char *,const char *);
 extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -23,6 +24,31 @@ extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 
+#ifdef CONFIG_PPC64
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+	return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+	return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+	return __memset64(p, v, n * 8);
+}
+#else
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
+#endif
 #endif /* __KERNEL__ */
 
 #endif	/* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 2cf846edb3fc..cb61eae5b7ed 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void)
 	ret = 0;
 
 	__asm__ __volatile__(
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 		"97:	mftb %0\n"
 #else
 		"97:	mfspr %0, %2\n"
@@ -45,11 +45,7 @@ static inline cycles_t get_cycles(void)
 		"	.long 0\n"
 		"	.long 0\n"
 		".previous"
-#ifdef CONFIG_8xx
-		: "=r" (ret) : "i" (CPU_FTR_601));
-#else
 		: "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
-#endif
 	return ret;
 #endif
 }
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 609557569f65..a7eabff27a0f 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -69,13 +69,22 @@ static inline int mm_is_core_local(struct mm_struct *mm)
 			      topology_sibling_cpumask(smp_processor_id()));
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	if (atomic_read(&mm->context.active_cpus) > 1)
+		return false;
+	return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
+}
+#else /* CONFIG_PPC_BOOK3S_64 */
 static inline int mm_is_thread_local(struct mm_struct *mm)
 {
 	return cpumask_equal(mm_cpumask(mm),
 			      cpumask_of(smp_processor_id()));
 }
+#endif /* !CONFIG_PPC_BOOK3S_64 */
 
-#else
+#else /* CONFIG_SMP */
 static inline int mm_is_core_local(struct mm_struct *mm)
 {
 	return 1;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index dc4e15937ccf..2d84bca8d053 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -16,8 +16,6 @@ struct device_node;
 
 #include <asm/mmzone.h>
 
-#define parent_node(node)	(node)
-
 #define cpumask_of_node(node) ((node) == -1 ?				\
 			       cpu_all_mask :				\
 			       node_to_cpumask_map[node])
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 000000000000..fd5963acd658
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_VAS_H
+#define _ASM_POWERPC_VAS_H
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN	(1 << 10)	/* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX	(8 << 20)	/* 8MB */
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ *	 limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED		0
+#define VAS_THRESH_FIFO_GT_HALF_FULL	1
+#define VAS_THRESH_FIFO_GT_QTR_FULL	2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL	3
+
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v)                (((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m)            (__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val)   \
+		(((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+	VAS_COP_TYPE_FAULT,
+	VAS_COP_TYPE_842,
+	VAS_COP_TYPE_842_HIPRI,
+	VAS_COP_TYPE_GZIP,
+	VAS_COP_TYPE_GZIP_HIPRI,
+	VAS_COP_TYPE_FTW,
+	VAS_COP_TYPE_MAX,
+};
+
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+	void *rx_fifo;
+	int rx_fifo_size;
+	int wcreds_max;
+
+	bool pin_win;
+	bool rej_no_credit;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_win_ord_mode;
+	bool rx_win_ord_mode;
+	bool data_stamp;
+	bool nx_win;
+	bool fault_win;
+	bool user_win;
+	bool notify_disable;
+	bool intr_disable;
+	bool notify_early;
+
+	int lnotify_lpid;
+	int lnotify_pid;
+	int lnotify_tid;
+	u32 pswid;
+
+	int tc_mode;
+};
+
+/*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+	enum vas_cop_type cop;
+	int wcreds_max;
+	int lpid;
+	int pidr;		/* hardware PID (from SPRN_PID) */
+	int pid;		/* linux process id */
+	int pswid;
+	int rsvd_txbuf_count;
+	int tc_mode;
+
+	bool user_win;
+	bool pin_win;
+	bool rej_no_credit;
+	bool rsvd_txbuf_enable;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_win_ord_mode;
+	bool rx_win_ord_mode;
+};
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+				   struct vas_rx_win_attr *attr);
+
+/*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+			enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr);
+
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ */
+int vas_copy_crb(void *crb, int offset);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. @re is expected/
+ * assumed to be true for NX windows.
+ */
+int vas_paste_crb(struct vas_window *win, int offset, bool re);
+
+#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index c23ff4389ca2..371fbebf1ec9 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -45,6 +45,7 @@ struct xive_irq_data {
 	void __iomem *trig_mmio;
 	u32 esb_shift;
 	int src_chip;
+	u32 hw_irq;
 
 	/* Setup/used by frontend */
 	int target;
@@ -55,6 +56,7 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_SHIFT_BUG	0x04
 #define XIVE_IRQ_FLAG_MASK_FW	0x08
 #define XIVE_IRQ_FLAG_EOI_FW	0x10
+#define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
 #define XIVE_INVALID_CHIP_ID	-1
 
@@ -110,11 +112,13 @@ extern bool __xive_enabled;
 
 static inline bool xive_enabled(void) { return __xive_enabled; }
 
+extern bool xive_spapr_init(void);
 extern bool xive_native_init(void);
 extern void xive_smp_probe(void);
 extern int  xive_smp_prepare_cpu(unsigned int cpu);
 extern void xive_smp_setup_cpu(void);
 extern void xive_smp_disable_cpu(void);
+extern void xive_teardown_cpu(void);
 extern void xive_kexec_teardown_cpu(int secondary);
 extern void xive_shutdown(void);
 extern void xive_flush_interrupt(void);
@@ -147,6 +151,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 
 static inline bool xive_enabled(void) { return false; }
 
+static inline bool xive_spapr_init(void) { return false; }
 static inline bool xive_native_init(void) { return false; }
 static inline void xive_smp_probe(void) { }
 extern inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index ab45cc2f3101..03c06ba7464f 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -29,20 +29,4 @@
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
-/*
- * When MAP_HUGETLB is set, bits [26:31] of the flags argument to mmap(2),
- * encode the log2 of the huge page size. A value of zero indicates that the
- * default huge page size should be used. To use a non-default huge page size,
- * one of these defines can be used, or the size can be encoded by hand. Note
- * that on most systems only a subset, or possibly none, of these sizes will be
- * available.
- */
-#define MAP_HUGE_512KB	(19 << MAP_HUGE_SHIFT)	/* 512KB HugeTLB Page */
-#define MAP_HUGE_1MB	(20 << MAP_HUGE_SHIFT)	/* 1MB   HugeTLB Page */
-#define MAP_HUGE_2MB	(21 << MAP_HUGE_SHIFT)	/* 2MB   HugeTLB Page */
-#define MAP_HUGE_8MB	(23 << MAP_HUGE_SHIFT)	/* 8MB   HugeTLB Page */
-#define MAP_HUGE_16MB	(24 << MAP_HUGE_SHIFT)	/* 16MB  HugeTLB Page */
-#define MAP_HUGE_1GB	(30 << MAP_HUGE_SHIFT)	/* 1GB   HugeTLB Page */
-#define MAP_HUGE_16GB	(34 << MAP_HUGE_SHIFT)	/* 16GB  HugeTLB Page */
-
 #endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 4aa7c147e447..91960f83039c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_VDSO32)		+= vdso32/
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= watchdog.o
+obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
@@ -83,7 +83,7 @@ extra-y				:= head_$(BITS).o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
-extra-$(CONFIG_8xx)		:= head_8xx.o
+extra-$(CONFIG_PPC_8xx)		:= head_8xx.o
 extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ec7a8b099dd9..26b9994d27ee 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -27,6 +27,7 @@
 #include <asm/switch_to.h>
 #include <asm/disassemble.h>
 #include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
 
 struct aligninfo {
 	unsigned char len;
@@ -40,364 +41,9 @@ struct aligninfo {
 #define LD	0	/* load */
 #define ST	1	/* store */
 #define SE	2	/* sign-extend value, or FP ld/st as word */
-#define F	4	/* to/from fp regs */
-#define U	8	/* update index register */
-#define M	0x10	/* multiple load/store */
 #define SW	0x20	/* byte swap */
-#define S	0x40	/* single-precision fp or... */
-#define SX	0x40	/* ... byte count in XER */
-#define HARD	0x80	/* string, stwcx. */
 #define E4	0x40	/* SPE endianness is word */
 #define E8	0x80	/* SPE endianness is double word */
-#define SPLT	0x80	/* VSX SPLAT load */
-
-/* DSISR bits reported for a DCBZ instruction: */
-#define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */
-
-/*
- * The PowerPC stores certain bits of the instruction that caused the
- * alignment exception in the DSISR register.  This array maps those
- * bits to information about the operand length and what the
- * instruction would do.
- */
-static struct aligninfo aligninfo[128] = {
-	{ 4, LD },		/* 00 0 0000: lwz / lwarx */
-	INVALID,		/* 00 0 0001 */
-	{ 4, ST },		/* 00 0 0010: stw */
-	INVALID,		/* 00 0 0011 */
-	{ 2, LD },		/* 00 0 0100: lhz */
-	{ 2, LD+SE },		/* 00 0 0101: lha */
-	{ 2, ST },		/* 00 0 0110: sth */
-	{ 4, LD+M },		/* 00 0 0111: lmw */
-	{ 4, LD+F+S },		/* 00 0 1000: lfs */
-	{ 8, LD+F },		/* 00 0 1001: lfd */
-	{ 4, ST+F+S },		/* 00 0 1010: stfs */
-	{ 8, ST+F },		/* 00 0 1011: stfd */
-	{ 16, LD },		/* 00 0 1100: lq */
-	{ 8, LD },		/* 00 0 1101: ld/ldu/lwa */
-	INVALID,		/* 00 0 1110 */
-	{ 8, ST },		/* 00 0 1111: std/stdu */
-	{ 4, LD+U },		/* 00 1 0000: lwzu */
-	INVALID,		/* 00 1 0001 */
-	{ 4, ST+U },		/* 00 1 0010: stwu */
-	INVALID,		/* 00 1 0011 */
-	{ 2, LD+U },		/* 00 1 0100: lhzu */
-	{ 2, LD+SE+U },		/* 00 1 0101: lhau */
-	{ 2, ST+U },		/* 00 1 0110: sthu */
-	{ 4, ST+M },		/* 00 1 0111: stmw */
-	{ 4, LD+F+S+U },	/* 00 1 1000: lfsu */
-	{ 8, LD+F+U },		/* 00 1 1001: lfdu */
-	{ 4, ST+F+S+U },	/* 00 1 1010: stfsu */
-	{ 8, ST+F+U },		/* 00 1 1011: stfdu */
-	{ 16, LD+F },		/* 00 1 1100: lfdp */
-	INVALID,		/* 00 1 1101 */
-	{ 16, ST+F },		/* 00 1 1110: stfdp */
-	INVALID,		/* 00 1 1111 */
-	{ 8, LD },		/* 01 0 0000: ldx */
-	INVALID,		/* 01 0 0001 */
-	{ 8, ST },		/* 01 0 0010: stdx */
-	INVALID,		/* 01 0 0011 */
-	INVALID,		/* 01 0 0100 */
-	{ 4, LD+SE },		/* 01 0 0101: lwax */
-	INVALID,		/* 01 0 0110 */
-	INVALID,		/* 01 0 0111 */
-	{ 4, LD+M+HARD+SX },	/* 01 0 1000: lswx */
-	{ 4, LD+M+HARD },	/* 01 0 1001: lswi */
-	{ 4, ST+M+HARD+SX },	/* 01 0 1010: stswx */
-	{ 4, ST+M+HARD },	/* 01 0 1011: stswi */
-	INVALID,		/* 01 0 1100 */
-	{ 8, LD+U },		/* 01 0 1101: ldu */
-	INVALID,		/* 01 0 1110 */
-	{ 8, ST+U },		/* 01 0 1111: stdu */
-	{ 8, LD+U },		/* 01 1 0000: ldux */
-	INVALID,		/* 01 1 0001 */
-	{ 8, ST+U },		/* 01 1 0010: stdux */
-	INVALID,		/* 01 1 0011 */
-	INVALID,		/* 01 1 0100 */
-	{ 4, LD+SE+U },		/* 01 1 0101: lwaux */
-	INVALID,		/* 01 1 0110 */
-	INVALID,		/* 01 1 0111 */
-	INVALID,		/* 01 1 1000 */
-	INVALID,		/* 01 1 1001 */
-	INVALID,		/* 01 1 1010 */
-	INVALID,		/* 01 1 1011 */
-	INVALID,		/* 01 1 1100 */
-	INVALID,		/* 01 1 1101 */
-	INVALID,		/* 01 1 1110 */
-	INVALID,		/* 01 1 1111 */
-	INVALID,		/* 10 0 0000 */
-	INVALID,		/* 10 0 0001 */
-	INVALID,		/* 10 0 0010: stwcx. */
-	INVALID,		/* 10 0 0011 */
-	INVALID,		/* 10 0 0100 */
-	INVALID,		/* 10 0 0101 */
-	INVALID,		/* 10 0 0110 */
-	INVALID,		/* 10 0 0111 */
-	{ 4, LD+SW },		/* 10 0 1000: lwbrx */
-	INVALID,		/* 10 0 1001 */
-	{ 4, ST+SW },		/* 10 0 1010: stwbrx */
-	INVALID,		/* 10 0 1011 */
-	{ 2, LD+SW },		/* 10 0 1100: lhbrx */
-	{ 4, LD+SE },		/* 10 0 1101  lwa */
-	{ 2, ST+SW },		/* 10 0 1110: sthbrx */
-	{ 16, ST },		/* 10 0 1111: stq */
-	INVALID,		/* 10 1 0000 */
-	INVALID,		/* 10 1 0001 */
-	INVALID,		/* 10 1 0010 */
-	INVALID,		/* 10 1 0011 */
-	INVALID,		/* 10 1 0100 */
-	INVALID,		/* 10 1 0101 */
-	INVALID,		/* 10 1 0110 */
-	INVALID,		/* 10 1 0111 */
-	INVALID,		/* 10 1 1000 */
-	INVALID,		/* 10 1 1001 */
-	INVALID,		/* 10 1 1010 */
-	INVALID,		/* 10 1 1011 */
-	INVALID,		/* 10 1 1100 */
-	INVALID,		/* 10 1 1101 */
-	INVALID,		/* 10 1 1110 */
-	{ 0, ST+HARD },		/* 10 1 1111: dcbz */
-	{ 4, LD },		/* 11 0 0000: lwzx */
-	INVALID,		/* 11 0 0001 */
-	{ 4, ST },		/* 11 0 0010: stwx */
-	INVALID,		/* 11 0 0011 */
-	{ 2, LD },		/* 11 0 0100: lhzx */
-	{ 2, LD+SE },		/* 11 0 0101: lhax */
-	{ 2, ST },		/* 11 0 0110: sthx */
-	INVALID,		/* 11 0 0111 */
-	{ 4, LD+F+S },		/* 11 0 1000: lfsx */
-	{ 8, LD+F },		/* 11 0 1001: lfdx */
-	{ 4, ST+F+S },		/* 11 0 1010: stfsx */
-	{ 8, ST+F },		/* 11 0 1011: stfdx */
-	{ 16, LD+F },		/* 11 0 1100: lfdpx */
-	{ 4, LD+F+SE },		/* 11 0 1101: lfiwax */
-	{ 16, ST+F },		/* 11 0 1110: stfdpx */
-	{ 4, ST+F },		/* 11 0 1111: stfiwx */
-	{ 4, LD+U },		/* 11 1 0000: lwzux */
-	INVALID,		/* 11 1 0001 */
-	{ 4, ST+U },		/* 11 1 0010: stwux */
-	INVALID,		/* 11 1 0011 */
-	{ 2, LD+U },		/* 11 1 0100: lhzux */
-	{ 2, LD+SE+U },		/* 11 1 0101: lhaux */
-	{ 2, ST+U },		/* 11 1 0110: sthux */
-	INVALID,		/* 11 1 0111 */
-	{ 4, LD+F+S+U },	/* 11 1 1000: lfsux */
-	{ 8, LD+F+U },		/* 11 1 1001: lfdux */
-	{ 4, ST+F+S+U },	/* 11 1 1010: stfsux */
-	{ 8, ST+F+U },		/* 11 1 1011: stfdux */
-	INVALID,		/* 11 1 1100 */
-	{ 4, LD+F },		/* 11 1 1101: lfiwzx */
-	INVALID,		/* 11 1 1110 */
-	INVALID,		/* 11 1 1111 */
-};
-
-/*
- * The dcbz (data cache block zero) instruction
- * gives an alignment fault if used on non-cacheable
- * memory.  We handle the fault mainly for the
- * case when we are running with the cache disabled
- * for debugging.
- */
-static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
-{
-	long __user *p;
-	int i, size;
-
-#ifdef __powerpc64__
-	size = ppc64_caches.l1d.block_size;
-#else
-	size = L1_CACHE_BYTES;
-#endif
-	p = (long __user *) (regs->dar & -size);
-	if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
-		return -EFAULT;
-	for (i = 0; i < size / sizeof(long); ++i)
-		if (__put_user_inatomic(0, p+i))
-			return -EFAULT;
-	return 1;
-}
-
-/*
- * Emulate load & store multiple instructions
- * On 64-bit machines, these instructions only affect/use the
- * bottom 4 bytes of each register, and the loads clear the
- * top 4 bytes of the affected register.
- */
-#ifdef __BIG_ENDIAN__
-#ifdef CONFIG_PPC64
-#define REG_BYTE(rp, i)		*((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
-#else
-#define REG_BYTE(rp, i)		*((u8 *)(rp) + (i))
-#endif
-#else
-#define REG_BYTE(rp, i)		(*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
-#endif
-
-#define SWIZ_PTR(p)		((unsigned char __user *)((p) ^ swiz))
-
-static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
-			    unsigned int reg, unsigned int nb,
-			    unsigned int flags, unsigned int instr,
-			    unsigned long swiz)
-{
-	unsigned long *rptr;
-	unsigned int nb0, i, bswiz;
-	unsigned long p;
-
-	/*
-	 * We do not try to emulate 8 bytes multiple as they aren't really
-	 * available in our operating environments and we don't try to
-	 * emulate multiples operations in kernel land as they should never
-	 * be used/generated there at least not on unaligned boundaries
-	 */
-	if (unlikely((nb > 4) || !user_mode(regs)))
-		return 0;
-
-	/* lmw, stmw, lswi/x, stswi/x */
-	nb0 = 0;
-	if (flags & HARD) {
-		if (flags & SX) {
-			nb = regs->xer & 127;
-			if (nb == 0)
-				return 1;
-		} else {
-			unsigned long pc = regs->nip ^ (swiz & 4);
-
-			if (__get_user_inatomic(instr,
-						(unsigned int __user *)pc))
-				return -EFAULT;
-			if (swiz == 0 && (flags & SW))
-				instr = cpu_to_le32(instr);
-			nb = (instr >> 11) & 0x1f;
-			if (nb == 0)
-				nb = 32;
-		}
-		if (nb + reg * 4 > 128) {
-			nb0 = nb + reg * 4 - 128;
-			nb = 128 - reg * 4;
-		}
-#ifdef __LITTLE_ENDIAN__
-		/*
-		 *  String instructions are endian neutral but the code
-		 *  below is not.  Force byte swapping on so that the
-		 *  effects of swizzling are undone in the load/store
-		 *  loops below.
-		 */
-		flags ^= SW;
-#endif
-	} else {
-		/* lwm, stmw */
-		nb = (32 - reg) * 4;
-	}
-
-	if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
-		return -EFAULT;	/* bad address */
-
-	rptr = &regs->gpr[reg];
-	p = (unsigned long) addr;
-	bswiz = (flags & SW)? 3: 0;
-
-	if (!(flags & ST)) {
-		/*
-		 * This zeroes the top 4 bytes of the affected registers
-		 * in 64-bit mode, and also zeroes out any remaining
-		 * bytes of the last register for lsw*.
-		 */
-		memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
-		if (nb0 > 0)
-			memset(&regs->gpr[0], 0,
-			       ((nb0 + 3) / 4) * sizeof(unsigned long));
-
-		for (i = 0; i < nb; ++i, ++p)
-			if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
-						SWIZ_PTR(p)))
-				return -EFAULT;
-		if (nb0 > 0) {
-			rptr = &regs->gpr[0];
-			addr += nb;
-			for (i = 0; i < nb0; ++i, ++p)
-				if (__get_user_inatomic(REG_BYTE(rptr,
-								 i ^ bswiz),
-							SWIZ_PTR(p)))
-					return -EFAULT;
-		}
-
-	} else {
-		for (i = 0; i < nb; ++i, ++p)
-			if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
-						SWIZ_PTR(p)))
-				return -EFAULT;
-		if (nb0 > 0) {
-			rptr = &regs->gpr[0];
-			addr += nb;
-			for (i = 0; i < nb0; ++i, ++p)
-				if (__put_user_inatomic(REG_BYTE(rptr,
-								 i ^ bswiz),
-							SWIZ_PTR(p)))
-					return -EFAULT;
-		}
-	}
-	return 1;
-}
-
-/*
- * Emulate floating-point pair loads and stores.
- * Only POWER6 has these instructions, and it does true little-endian,
- * so we don't need the address swizzling.
- */
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
-			   unsigned int flags)
-{
-	char *ptr0 = (char *) &current->thread.TS_FPR(reg);
-	char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
-	int i, ret, sw = 0;
-
-	if (reg & 1)
-		return 0;	/* invalid form: FRS/FRT must be even */
-	if (flags & SW)
-		sw = 7;
-	ret = 0;
-	for (i = 0; i < 8; ++i) {
-		if (!(flags & ST)) {
-			ret |= __get_user(ptr0[i^sw], addr + i);
-			ret |= __get_user(ptr1[i^sw], addr + i + 8);
-		} else {
-			ret |= __put_user(ptr0[i^sw], addr + i);
-			ret |= __put_user(ptr1[i^sw], addr + i + 8);
-		}
-	}
-	if (ret)
-		return -EFAULT;
-	return 1;	/* exception handled and fixed up */
-}
-
-#ifdef CONFIG_PPC64
-static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
-			  unsigned int reg, unsigned int flags)
-{
-	char *ptr0 = (char *)&regs->gpr[reg];
-	char *ptr1 = (char *)&regs->gpr[reg+1];
-	int i, ret, sw = 0;
-
-	if (reg & 1)
-		return 0;	/* invalid form: GPR must be even */
-	if (flags & SW)
-		sw = 7;
-	ret = 0;
-	for (i = 0; i < 8; ++i) {
-		if (!(flags & ST)) {
-			ret |= __get_user(ptr0[i^sw], addr + i);
-			ret |= __get_user(ptr1[i^sw], addr + i + 8);
-		} else {
-			ret |= __put_user(ptr0[i^sw], addr + i);
-			ret |= __put_user(ptr1[i^sw], addr + i + 8);
-		}
-	}
-	if (ret)
-		return -EFAULT;
-	return 1;	/* exception handled and fixed up */
-}
-#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_SPE
 
@@ -636,133 +282,21 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 }
 #endif /* CONFIG_SPE */
 
-#ifdef CONFIG_VSX
-/*
- * Emulate VSX instructions...
- */
-static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
-		       unsigned int areg, struct pt_regs *regs,
-		       unsigned int flags, unsigned int length,
-		       unsigned int elsize)
-{
-	char *ptr;
-	unsigned long *lptr;
-	int ret = 0;
-	int sw = 0;
-	int i, j;
-
-	/* userland only */
-	if (unlikely(!user_mode(regs)))
-		return 0;
-
-	flush_vsx_to_thread(current);
-
-	if (reg < 32)
-		ptr = (char *) &current->thread.fp_state.fpr[reg][0];
-	else
-		ptr = (char *) &current->thread.vr_state.vr[reg - 32];
-
-	lptr = (unsigned long *) ptr;
-
-#ifdef __LITTLE_ENDIAN__
-	if (flags & SW) {
-		elsize = length;
-		sw = length-1;
-	} else {
-		/*
-		 * The elements are BE ordered, even in LE mode, so process
-		 * them in reverse order.
-		 */
-		addr += length - elsize;
-
-		/* 8 byte memory accesses go in the top 8 bytes of the VR */
-		if (length == 8)
-			ptr += 8;
-	}
-#else
-	if (flags & SW)
-		sw = elsize-1;
-#endif
-
-	for (j = 0; j < length; j += elsize) {
-		for (i = 0; i < elsize; ++i) {
-			if (flags & ST)
-				ret |= __put_user(ptr[i^sw], addr + i);
-			else
-				ret |= __get_user(ptr[i^sw], addr + i);
-		}
-		ptr  += elsize;
-#ifdef __LITTLE_ENDIAN__
-		addr -= elsize;
-#else
-		addr += elsize;
-#endif
-	}
-
-#ifdef __BIG_ENDIAN__
-#define VSX_HI 0
-#define VSX_LO 1
-#else
-#define VSX_HI 1
-#define VSX_LO 0
-#endif
-
-	if (!ret) {
-		if (flags & U)
-			regs->gpr[areg] = regs->dar;
-
-		/* Splat load copies the same data to top and bottom 8 bytes */
-		if (flags & SPLT)
-			lptr[VSX_LO] = lptr[VSX_HI];
-		/* For 8 byte loads, zero the low 8 bytes */
-		else if (!(flags & ST) && (8 == length))
-			lptr[VSX_LO] = 0;
-	} else
-		return -EFAULT;
-
-	return 1;
-}
-#endif
-
 /*
  * Called on alignment exception. Attempts to fixup
  *
  * Return 1 on success
  * Return 0 if unable to handle the interrupt
  * Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
  */
 
 int fix_alignment(struct pt_regs *regs)
 {
-	unsigned int instr, nb, flags, instruction = 0;
-	unsigned int reg, areg;
-	unsigned int dsisr;
-	unsigned char __user *addr;
-	unsigned long p, swiz;
-	int ret, i;
-	union data {
-		u64 ll;
-		double dd;
-		unsigned char v[8];
-		struct {
-#ifdef __LITTLE_ENDIAN__
-			int	 low32;
-			unsigned hi32;
-#else
-			unsigned hi32;
-			int	 low32;
-#endif
-		} x32;
-		struct {
-#ifdef __LITTLE_ENDIAN__
-			short	      low16;
-			unsigned char hi48[6];
-#else
-			unsigned char hi48[6];
-			short	      low16;
-#endif
-		} x16;
-	} data;
+	unsigned int instr;
+	struct instruction_op op;
+	int r, type;
 
 	/*
 	 * We require a complete register set, if not, then our assembly
@@ -770,121 +304,23 @@ int fix_alignment(struct pt_regs *regs)
 	 */
 	CHECK_FULL_REGS(regs);
 
-	dsisr = regs->dsisr;
-
-	/* Some processors don't provide us with a DSISR we can use here,
-	 * let's make one up from the instruction
-	 */
-	if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
-		unsigned long pc = regs->nip;
-
-		if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
-			pc ^= 4;
-		if (unlikely(__get_user_inatomic(instr,
-						 (unsigned int __user *)pc)))
-			return -EFAULT;
-		if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
-			instr = cpu_to_le32(instr);
-		dsisr = make_dsisr(instr);
-		instruction = instr;
+	if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+		return -EFAULT;
+	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+		/* We don't handle PPC little-endian any more... */
+		if (cpu_has_feature(CPU_FTR_PPC_LE))
+			return -EIO;
+		instr = swab32(instr);
 	}
 
-	/* extract the operation and registers from the dsisr */
-	reg = (dsisr >> 5) & 0x1f;	/* source/dest register */
-	areg = dsisr & 0x1f;		/* register to update */
-
 #ifdef CONFIG_SPE
 	if ((instr >> 26) == 0x4) {
+		int reg = (instr >> 21) & 0x1f;
 		PPC_WARN_ALIGNMENT(spe, regs);
 		return emulate_spe(regs, reg, instr);
 	}
 #endif
 
-	instr = (dsisr >> 10) & 0x7f;
-	instr |= (dsisr >> 13) & 0x60;
-
-	/* Lookup the operation in our table */
-	nb = aligninfo[instr].len;
-	flags = aligninfo[instr].flags;
-
-	/*
-	 * Handle some cases which give overlaps in the DSISR values.
-	 */
-	if (IS_XFORM(instruction)) {
-		switch (get_xop(instruction)) {
-		case 532:	/* ldbrx */
-			nb = 8;
-			flags = LD+SW;
-			break;
-		case 660:	/* stdbrx */
-			nb = 8;
-			flags = ST+SW;
-			break;
-		case 20:	/* lwarx */
-		case 84:	/* ldarx */
-		case 116:	/* lharx */
-		case 276:	/* lqarx */
-			return 0;	/* not emulated ever */
-		}
-	}
-
-	/* Byteswap little endian loads and stores */
-	swiz = 0;
-	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
-		flags ^= SW;
-#ifdef __BIG_ENDIAN__
-		/*
-		 * So-called "PowerPC little endian" mode works by
-		 * swizzling addresses rather than by actually doing
-		 * any byte-swapping.  To emulate this, we XOR each
-		 * byte address with 7.  We also byte-swap, because
-		 * the processor's address swizzling depends on the
-		 * operand size (it xors the address with 7 for bytes,
-		 * 6 for halfwords, 4 for words, 0 for doublewords) but
-		 * we will xor with 7 and load/store each byte separately.
-		 */
-		if (cpu_has_feature(CPU_FTR_PPC_LE))
-			swiz = 7;
-#endif
-	}
-
-	/* DAR has the operand effective address */
-	addr = (unsigned char __user *)regs->dar;
-
-#ifdef CONFIG_VSX
-	if ((instruction & 0xfc00003e) == 0x7c000018) {
-		unsigned int elsize;
-
-		/* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
-		reg |= (instruction & 0x1) << 5;
-		/* Simple inline decoder instead of a table */
-		/* VSX has only 8 and 16 byte memory accesses */
-		nb = 8;
-		if (instruction & 0x200)
-			nb = 16;
-
-		/* Vector stores in little-endian mode swap individual
-		   elements, so process them separately */
-		elsize = 4;
-		if (instruction & 0x80)
-			elsize = 8;
-
-		flags = 0;
-		if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
-			flags |= SW;
-		if (instruction & 0x100)
-			flags |= ST;
-		if (instruction & 0x040)
-			flags |= U;
-		/* splat load needs a special decoder */
-		if ((instruction & 0x400) == 0){
-			flags |= SPLT;
-			nb = 8;
-		}
-		PPC_WARN_ALIGNMENT(vsx, regs);
-		return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
-	}
-#endif
 
 	/*
 	 * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
@@ -896,173 +332,27 @@ int fix_alignment(struct pt_regs *regs)
 	 * when pasting to a co-processor. Furthermore, paste_last is the
 	 * synchronisation point for preceding copy/paste sequences.
 	 */
-	if ((instruction & 0xfc0006fe) == PPC_INST_COPY)
+	if ((instr & 0xfc0006fe) == PPC_INST_COPY)
 		return -EIO;
 
-	/* A size of 0 indicates an instruction we don't support, with
-	 * the exception of DCBZ which is handled as a special case here
-	 */
-	if (instr == DCBZ) {
-		PPC_WARN_ALIGNMENT(dcbz, regs);
-		return emulate_dcbz(regs, addr);
-	}
-	if (unlikely(nb == 0))
-		return 0;
-
-	/* Load/Store Multiple instructions are handled in their own
-	 * function
-	 */
-	if (flags & M) {
-		PPC_WARN_ALIGNMENT(multiple, regs);
-		return emulate_multiple(regs, addr, reg, nb,
-					flags, instr, swiz);
-	}
-
-	/* Verify the address of the operand */
-	if (unlikely(user_mode(regs) &&
-		     !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
-				addr, nb)))
-		return -EFAULT;
-
-	/* Force the fprs into the save area so we can reference them */
-	if (flags & F) {
-		/* userland only */
-		if (unlikely(!user_mode(regs)))
-			return 0;
-		flush_fp_to_thread(current);
-	}
+	r = analyse_instr(&op, regs, instr);
+	if (r < 0)
+		return -EINVAL;
 
-	if (nb == 16) {
-		if (flags & F) {
-			/* Special case for 16-byte FP loads and stores */
-			PPC_WARN_ALIGNMENT(fp_pair, regs);
-			return emulate_fp_pair(addr, reg, flags);
-		} else {
-#ifdef CONFIG_PPC64
-			/* Special case for 16-byte loads and stores */
-			PPC_WARN_ALIGNMENT(lq_stq, regs);
-			return emulate_lq_stq(regs, addr, reg, flags);
-#else
-			return 0;
-#endif
-		}
-	}
-
-	PPC_WARN_ALIGNMENT(unaligned, regs);
-
-	/* If we are loading, get the data from user space, else
-	 * get it from register values
-	 */
-	if (!(flags & ST)) {
-		unsigned int start = 0;
-
-		switch (nb) {
-		case 4:
-			start = offsetof(union data, x32.low32);
-			break;
-		case 2:
-			start = offsetof(union data, x16.low16);
-			break;
-		}
-
-		data.ll = 0;
-		ret = 0;
-		p = (unsigned long)addr;
-
-		for (i = 0; i < nb; i++)
-			ret |= __get_user_inatomic(data.v[start + i],
-						   SWIZ_PTR(p++));
-
-		if (unlikely(ret))
-			return -EFAULT;
-
-	} else if (flags & F) {
-		data.ll = current->thread.TS_FPR(reg);
-		if (flags & S) {
-			/* Single-precision FP store requires conversion... */
-#ifdef CONFIG_PPC_FPU
-			preempt_disable();
-			enable_kernel_fp();
-			cvt_df(&data.dd, (float *)&data.x32.low32);
-			disable_kernel_fp();
-			preempt_enable();
-#else
-			return 0;
-#endif
-		}
-	} else
-		data.ll = regs->gpr[reg];
-
-	if (flags & SW) {
-		switch (nb) {
-		case 8:
-			data.ll = swab64(data.ll);
-			break;
-		case 4:
-			data.x32.low32 = swab32(data.x32.low32);
-			break;
-		case 2:
-			data.x16.low16 = swab16(data.x16.low16);
-			break;
-		}
-	}
-
-	/* Perform other misc operations like sign extension
-	 * or floating point single precision conversion
-	 */
-	switch (flags & ~(U|SW)) {
-	case LD+SE:	/* sign extending integer loads */
-	case LD+F+SE:	/* sign extend for lfiwax */
-		if ( nb == 2 )
-			data.ll = data.x16.low16;
-		else	/* nb must be 4 */
-			data.ll = data.x32.low32;
-		break;
-
-	/* Single-precision FP load requires conversion... */
-	case LD+F+S:
-#ifdef CONFIG_PPC_FPU
-		preempt_disable();
-		enable_kernel_fp();
-		cvt_fd((float *)&data.x32.low32, &data.dd);
-		disable_kernel_fp();
-		preempt_enable();
-#else
-		return 0;
-#endif
-		break;
+	type = op.type & INSTR_TYPE_MASK;
+	if (!OP_IS_LOAD_STORE(type)) {
+		if (type != CACHEOP + DCBZ)
+			return -EINVAL;
+		PPC_WARN_ALIGNMENT(dcbz, regs);
+		r = emulate_dcbz(op.ea, regs);
+	} else {
+		if (type == LARX || type == STCX)
+			return -EIO;
+		PPC_WARN_ALIGNMENT(unaligned, regs);
+		r = emulate_loadstore(regs, &op);
 	}
 
-	/* Store result to memory or update registers */
-	if (flags & ST) {
-		unsigned int start = 0;
-
-		switch (nb) {
-		case 4:
-			start = offsetof(union data, x32.low32);
-			break;
-		case 2:
-			start = offsetof(union data, x16.low16);
-			break;
-		}
-
-		ret = 0;
-		p = (unsigned long)addr;
-
-		for (i = 0; i < nb; i++)
-			ret |= __put_user_inatomic(data.v[start + i],
-						   SWIZ_PTR(p++));
-
-		if (unlikely(ret))
-			return -EFAULT;
-	} else if (flags & F)
-		current->thread.TS_FPR(reg) = data.ll;
-	else
-		regs->gpr[reg] = data.ll;
-
-	/* Update RA as needed */
-	if (flags & U)
-		regs->gpr[areg] = regs->dar;
-
-	return 1;
+	if (!r)
+		return 1;
+	return r;
 }
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6e95c2c19a7e..8cfb20e38cfe 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -746,6 +746,14 @@ int main(void)
 	OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
 	OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
 	OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+#define STOP_SPR(x, f)	OFFSET(x, paca_struct, stop_sprs.f)
+	STOP_SPR(STOP_PID, pid);
+	STOP_SPR(STOP_LDBAR, ldbar);
+	STOP_SPR(STOP_FSCR, fscr);
+	STOP_SPR(STOP_HFSCR, hfscr);
+	STOP_SPR(STOP_MMCR1, mmcr1);
+	STOP_SPR(STOP_MMCR2, mmcr2);
+	STOP_SPR(STOP_MMCRA, mmcra);
 #endif
 
 	DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 8275858a434d..3f46ca1c59f9 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -253,7 +253,7 @@ int __init btext_find_display(int allow_nonstdout)
 
 	for_each_node_by_type(np, "display") {
 		if (of_get_property(np, "linux,opened", NULL)) {
-			printk("trying %s ...\n", np->full_name);
+			printk("trying %pOF ...\n", np);
 			rc = btext_initialize(np);
 			printk("result: %d\n", rc);
 		}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index c641983bbdd6..a8f20e5928e1 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -167,10 +167,10 @@ static void release_cache_debugcheck(struct cache *cache)
 
 	list_for_each_entry(iter, &cache_list, list)
 		WARN_ONCE(iter->next_local == cache,
-			  "cache for %s(%s) refers to cache for %s(%s)\n",
-			  iter->ofnode->full_name,
+			  "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+			  iter->ofnode,
 			  cache_type_string(iter),
-			  cache->ofnode->full_name,
+			  cache->ofnode,
 			  cache_type_string(cache));
 }
 
@@ -179,8 +179,8 @@ static void release_cache(struct cache *cache)
 	if (!cache)
 		return;
 
-	pr_debug("freeing L%d %s cache for %s\n", cache->level,
-		 cache_type_string(cache), cache->ofnode->full_name);
+	pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+		 cache_type_string(cache), cache->ofnode);
 
 	release_cache_debugcheck(cache);
 	list_del(&cache->list);
@@ -194,8 +194,8 @@ static void cache_cpu_set(struct cache *cache, int cpu)
 
 	while (next) {
 		WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
-			  "CPU %i already accounted in %s(%s)\n",
-			  cpu, next->ofnode->full_name,
+			  "CPU %i already accounted in %pOF(%s)\n",
+			  cpu, next->ofnode,
 			  cache_type_string(next));
 		cpumask_set_cpu(cpu, &next->shared_cpu_map);
 		next = next->next_local;
@@ -355,7 +355,7 @@ static int cache_is_unified_d(const struct device_node *np)
  */
 static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
 {
-	pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+	pr_debug("creating L%d ucache for %pOF\n", level, node);
 
 	return new_cache(cache_is_unified_d(node), level, node);
 }
@@ -365,8 +365,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
 {
 	struct cache *dcache, *icache;
 
-	pr_debug("creating L%d dcache and icache for %s\n", level,
-		 node->full_name);
+	pr_debug("creating L%d dcache and icache for %pOF\n", level,
+		 node);
 
 	dcache = new_cache(CACHE_TYPE_DATA, level, node);
 	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
@@ -679,7 +679,6 @@ static struct kobj_type cache_index_type = {
 
 static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
 {
-	const char *cache_name;
 	const char *cache_type;
 	struct cache *cache;
 	char *buf;
@@ -690,7 +689,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
 		return;
 
 	cache = dir->cache;
-	cache_name = cache->ofnode->full_name;
 	cache_type = cache_type_string(cache);
 
 	/* We don't want to create an attribute that can't provide a
@@ -707,14 +705,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
 		rc = attr->show(&dir->kobj, attr, buf);
 		if (rc <= 0) {
 			pr_debug("not creating %s attribute for "
-				 "%s(%s) (rc = %zd)\n",
-				 attr->attr.name, cache_name,
+				 "%pOF(%s) (rc = %zd)\n",
+				 attr->attr.name, cache->ofnode,
 				 cache_type, rc);
 			continue;
 		}
 		if (sysfs_create_file(&dir->kobj, &attr->attr))
-			pr_debug("could not create %s attribute for %s(%s)\n",
-				 attr->attr.name, cache_name, cache_type);
+			pr_debug("could not create %s attribute for %pOF(%s)\n",
+				 attr->attr.name, cache->ofnode, cache_type);
 	}
 
 	kfree(buf);
@@ -831,8 +829,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
 		struct cache *next = cache->next_local;
 
 		WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
-			  "CPU %i not accounted in %s(%s)\n",
-			  cpu, cache->ofnode->full_name,
+			  "CPU %i not accounted in %pOF(%s)\n",
+			  cpu, cache->ofnode,
 			  cache_type_string(cache));
 
 		cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6f849832a669..760872916013 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1259,10 +1259,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.platform		= "ppc603",
 	},
 #endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 	{	/* 8xx */
 		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00500000,
+		.pvr_value		= PVR_8xx,
 		.cpu_name		= "8xx",
 		/* CPU_FTR_MAYBE_CAN_DOZE is possible,
 		 * if the 8xx code is there.... */
@@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_8xx,
 		.platform		= "ppc823",
 	},
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
 #ifdef CONFIG_40x
 	{	/* 403GC */
 		.pvr_mask		= 0xffffff00,
@@ -1936,6 +1936,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
+#ifdef CONFIG_PPC_47x
 	{ /* 476 DD2 core */
 		.pvr_mask		= 0xffffffff,
 		.pvr_value		= 0x11a52080,
@@ -1992,6 +1993,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_47x,
 		.platform		= "ppc470",
 	},
+#endif /* CONFIG_PPC_47x */
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 63992b2d8e15..9e816787c0d4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -44,6 +44,7 @@
 #include <asm/machdep.h>
 #include <asm/ppc-pci.h>
 #include <asm/rtas.h>
+#include <asm/pte-walk.h>
 
 
 /** Overview:
@@ -169,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 	char buffer[128];
 
 	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
-		       edev->phb->global_number, pdn->busno,
+		       pdn->phb->global_number, pdn->busno,
 		       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 	pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
-		edev->phb->global_number, pdn->busno,
+		pdn->phb->global_number, pdn->busno,
 		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
 	eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	 * worried about _PAGE_SPLITTING/collapse. Also we will not hit
 	 * page table free, because of init_mm.
 	 */
-	ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
-					   NULL, &hugepage_shift);
+	ptep = find_init_mm_pte(token, &hugepage_shift);
 	if (!ptep)
 		return token;
 	WARN_ON(hugepage_shift);
@@ -435,7 +435,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	int ret;
 	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
-	struct pci_dn *pdn;
+	struct device_node *dn;
 	struct pci_dev *dev;
 	struct eeh_pe *pe, *parent_pe, *phb_pe;
 	int rc = 0;
@@ -493,9 +493,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	if (pe->state & EEH_PE_ISOLATED) {
 		pe->check_count++;
 		if (pe->check_count % EEH_MAX_FAILS == 0) {
-			pdn = eeh_dev_to_pdn(edev);
-			if (pdn->node)
-				location = of_get_property(pdn->node, "ibm,loc-code", NULL);
+			dn = pci_device_to_OF_node(dev);
+			if (dn)
+				location = of_get_property(dn, "ibm,loc-code",
+						NULL);
 			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 				"location=%s driver=%s pci addr=%s\n",
 				pe->check_count,
@@ -1064,7 +1065,7 @@ core_initcall_sync(eeh_init);
  */
 void eeh_add_device_early(struct pci_dn *pdn)
 {
-	struct pci_controller *phb;
+	struct pci_controller *phb = pdn ? pdn->phb : NULL;
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
 	if (!edev)
@@ -1074,7 +1075,6 @@ void eeh_add_device_early(struct pci_dn *pdn)
 		return;
 
 	/* USB Bus children of PCI devices will not have BUID's */
-	phb = edev->phb;
 	if (NULL == phb ||
 	    (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
 		return;
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index d6b2ca70d14d..ad04ecd63c20 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -50,21 +50,16 @@
  */
 struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
 {
-	struct pci_controller *phb = pdn->phb;
 	struct eeh_dev *edev;
 
 	/* Allocate EEH device */
 	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev) {
-		pr_warn("%s: out of memory\n",
-			__func__);
+	if (!edev)
 		return NULL;
-	}
 
 	/* Associate EEH device with OF node */
 	pdn->edev = edev;
 	edev->pdn = pdn;
-	edev->phb = phb;
 	INIT_LIST_HEAD(&edev->list);
 	INIT_LIST_HEAD(&edev->rmv_list);
 
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index c405c79e50cd..8b840191df59 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
 
 	if (!(edev->physfn)) {
 		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
-			__func__, edev->phb->global_number, pdn->busno,
+			__func__, pdn->phb->global_number, pdn->busno,
 			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 		return NULL;
 	}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index cc4b206f77e4..2e8d1b2b5af4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
  * Bus/Device/Function number. The extra data referred by flag
  * indicates which type of address should be used.
  */
+struct eeh_pe_get_flag {
+	int pe_no;
+	int config_addr;
+};
+
 static void *__eeh_pe_get(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
-	struct eeh_dev *edev = (struct eeh_dev *)flag;
+	struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
 
 	/* Unexpected PHB PE */
 	if (pe->type & EEH_PE_PHB)
@@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag)
 	 * have non-zero PE address
 	 */
 	if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
-		if (edev->pe_config_addr == pe->addr)
+		if (tmp->pe_no == pe->addr)
 			return pe;
 	} else {
-		if (edev->pe_config_addr &&
-		    (edev->pe_config_addr == pe->addr))
+		if (tmp->pe_no &&
+		    (tmp->pe_no == pe->addr))
 			return pe;
 	}
 
 	/* Try BDF address */
-	if (edev->config_addr &&
-	   (edev->config_addr == pe->config_addr))
+	if (tmp->config_addr &&
+	   (tmp->config_addr == pe->config_addr))
 		return pe;
 
 	return NULL;
@@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag)
 
 /**
  * eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
+ * @phb: PCI controller
+ * @pe_no: PE number
+ * @config_addr: Config address
  *
  * Search the corresponding PE based on the specified address which
  * is included in the eeh device. The function is used to check if
@@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag)
  * which is composed of PCI bus/device/function number, or unified
  * PE address.
  */
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+		int pe_no, int config_addr)
 {
-	struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+	struct eeh_pe *root = eeh_phb_pe_get(phb);
+	struct eeh_pe_get_flag tmp = { pe_no, config_addr };
 	struct eeh_pe *pe;
 
-	pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+	pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
 
 	return pe;
 }
@@ -330,11 +339,13 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
 int eeh_add_to_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
 	/* Check if the PE number is valid */
 	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
 		pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
-		       __func__, edev->config_addr, edev->phb->global_number);
+		       __func__, config_addr, pdn->phb->global_number);
 		return -EINVAL;
 	}
 
@@ -344,7 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	 * PE should be composed of PCI bus and its subordinate
 	 * components.
 	 */
-	pe = eeh_pe_get(edev);
+	pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
 	if (pe && !(pe->type & EEH_PE_INVALID)) {
 		/* Mark the PE as type of PCI bus */
 		pe->type = EEH_PE_BUS;
@@ -353,11 +364,11 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 		/* Put the edev to PE */
 		list_add_tail(&edev->list, &pe->edevs);
 		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
-			edev->phb->global_number,
-			edev->config_addr >> 8,
-			PCI_SLOT(edev->config_addr & 0xFF),
-			PCI_FUNC(edev->config_addr & 0xFF),
-			pe->addr);
+			 pdn->phb->global_number,
+			 pdn->busno,
+			 PCI_SLOT(pdn->devfn),
+			 PCI_FUNC(pdn->devfn),
+			 pe->addr);
 		return 0;
 	} else if (pe && (pe->type & EEH_PE_INVALID)) {
 		list_add_tail(&edev->list, &pe->edevs);
@@ -376,25 +387,25 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 
 		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
 			 "PE#%x, Parent PE#%x\n",
-			edev->phb->global_number,
-			edev->config_addr >> 8,
-                        PCI_SLOT(edev->config_addr & 0xFF),
-                        PCI_FUNC(edev->config_addr & 0xFF),
-			pe->addr, pe->parent->addr);
+			 pdn->phb->global_number,
+			 pdn->busno,
+			 PCI_SLOT(pdn->devfn),
+			 PCI_FUNC(pdn->devfn),
+			 pe->addr, pe->parent->addr);
 		return 0;
 	}
 
 	/* Create a new EEH PE */
 	if (edev->physfn)
-		pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+		pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
 	else
-		pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+		pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
 	if (!pe) {
 		pr_err("%s: out of memory!\n", __func__);
 		return -ENOMEM;
 	}
 	pe->addr	= edev->pe_config_addr;
-	pe->config_addr	= edev->config_addr;
+	pe->config_addr	= config_addr;
 
 	/*
 	 * Put the new EEH PE into hierarchy tree. If the parent
@@ -404,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	 */
 	parent = eeh_pe_get_parent(edev);
 	if (!parent) {
-		parent = eeh_phb_pe_get(edev->phb);
+		parent = eeh_phb_pe_get(pdn->phb);
 		if (!parent) {
 			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
-				__func__, edev->phb->global_number);
+				__func__, pdn->phb->global_number);
 			edev->pe = NULL;
 			kfree(pe);
 			return -EEXIST;
@@ -424,10 +435,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	edev->pe = pe;
 	pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
 		 "Device PE#%x, Parent PE#%x\n",
-		 edev->phb->global_number,
-		 edev->config_addr >> 8,
-		 PCI_SLOT(edev->config_addr & 0xFF),
-		 PCI_FUNC(edev->config_addr & 0xFF),
+		 pdn->phb->global_number,
+		 pdn->busno,
+		 PCI_SLOT(pdn->devfn),
+		 PCI_FUNC(pdn->devfn),
 		 pe->addr, pe->parent->addr);
 
 	return 0;
@@ -446,13 +457,14 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent, *child;
 	int cnt;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	if (!edev->pe) {
 		pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
-			 __func__,  edev->phb->global_number,
-			 edev->config_addr >> 8,
-			 PCI_SLOT(edev->config_addr & 0xFF),
-			 PCI_FUNC(edev->config_addr & 0xFF));
+			 __func__,  pdn->phb->global_number,
+			 pdn->busno,
+			 PCI_SLOT(pdn->devfn),
+			 PCI_FUNC(pdn->devfn));
 		return -EEXIST;
 	}
 
@@ -712,10 +724,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
 		return;
 
 	pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
-		 __func__, edev->phb->global_number,
-		 edev->config_addr >> 8,
-		 PCI_SLOT(edev->config_addr & 0xFF),
-		 PCI_FUNC(edev->config_addr & 0xFF));
+		 __func__, pdn->phb->global_number,
+		 pdn->busno,
+		 PCI_SLOT(pdn->devfn),
+		 PCI_FUNC(pdn->devfn));
 
 	/* Check slot status */
 	cap = edev->pcie_cap;
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 1ceecdda810b..797549289798 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev,      \
 static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
 
 EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
 EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
 
 static ssize_t eeh_pe_state_show(struct device *dev,
@@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
 		return;
 
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
-	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
@@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
 	}
 
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
-	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8587059ad848..e780e1fbf6c2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -43,6 +43,13 @@
 #define LOAD_MSR_KERNEL(r, x)	li r,(x)
 #endif
 
+/*
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
+ */
+	.align	12
+
 #ifdef CONFIG_BOOKE
 	.globl	mcheck_transfer_to_handler
 mcheck_transfer_to_handler:
@@ -586,6 +593,10 @@ ppc_swapcontext:
 handle_page_fault:
 	stw	r4,_DAR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_6xx
+	andis.  r0,r5,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+#endif
 	bl	do_page_fault
 	cmpwi	r3,0
 	beq+	ret_from_except
@@ -599,6 +610,17 @@ handle_page_fault:
 	bl	bad_page_fault
 	b	ret_from_except_full
 
+#ifdef CONFIG_6xx
+	/* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+	SAVE_NVGPRS(r1)
+	lwz	r0,_TRAP(r1)
+	clrrwi	r0,r0,1
+	stw	r0,_TRAP(r1)
+	bl      do_break
+	b	ret_from_except_full
+#endif
+
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e925c1c99c71..4a0fd4f40245 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -966,16 +966,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 #ifdef CONFIG_PPC_BOOK3E
 	cmpwi	cr0,r3,0x280
 #else
-	BEGIN_FTR_SECTION
-		cmpwi	cr0,r3,0xe80
-	FTR_SECTION_ELSE
-		cmpwi	cr0,r3,0xa00
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+	cmpwi	cr0,r3,0xa00
 #endif /* CONFIG_PPC_BOOK3E */
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
 	bl	doorbell_exception
-	b	ret_from_except
 #endif /* CONFIG_PPC_DOORBELL */
 1:	b	ret_from_except /* What else to do here ? */
  
@@ -1109,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
 
 	.align	3
-1:	.llong	rtas_restore_regs
+1:	.8byte	rtas_restore_regs
 
 rtas_restore_regs:
 	/* relocation is on at this point */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f14f3c04ec7e..48da0f5d2f7f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -541,7 +541,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
 	RECONCILE_IRQ_STATE(r10, r11)
 	ld	r12,_MSR(r1)
 	ld	r3,_NIP(r1)
-	andis.	r4,r12,0x5820
+	andis.	r4,r12,DSISR_BAD_FAULT_64S@h
 	li	r5,0x400
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
@@ -1314,7 +1314,7 @@ EXC_REAL_NONE(0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
 #endif
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+#ifdef CONFIG_PPC_WATCHDOG
 
 #define MASKED_DEC_HANDLER_LABEL 3f
 
@@ -1343,10 +1343,10 @@ EXC_COMMON_BEGIN(soft_nmi_common)
 			ADD_NVGPRS;ADD_RECONCILE)
 	b	ret_from_except
 
-#else
+#else /* CONFIG_PPC_WATCHDOG */
 #define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
 #define MASKED_DEC_HANDLER(_H)
-#endif
+#endif /* CONFIG_PPC_WATCHDOG */
 
 /*
  * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1370,19 +1370,16 @@ masked_##_H##interrupt:					\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
 	b	MASKED_DEC_HANDLER_LABEL;		\
-1:	cmpwi	r10,PACA_IRQ_DBELL;			\
-	beq	2f;					\
-	cmpwi	r10,PACA_IRQ_HMI;			\
-	beq	2f;					\
+1:	andi.	r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);	\
+	bne	2f;					\
 	mfspr	r10,SPRN_##_H##SRR1;			\
-	rldicl	r10,r10,48,1; /* clear MSR_EE */	\
-	rotldi	r10,r10,16;				\
+	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\
 	mtspr	SPRN_##_H##SRR1,r10;			\
 2:	mtcrf	0x80,r9;				\
 	ld	r9,PACA_EXGEN+EX_R9(r13);		\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
-	GET_SCRATCH0(r13);				\
+	/* returns to kernel where r13 must be set up, so don't restore it */ \
 	##_H##rfid;					\
 	b	.;					\
 	MASKED_DEC_HANDLER(_H)
@@ -1485,8 +1482,10 @@ USE_TEXT_SECTION()
  */
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
-#ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa450		/* weird error? */
+	#ifdef CONFIG_PPC_STD_MMU_64
+	lis	r0,DSISR_BAD_FAULT_64S@h
+	ori	r0,r0,DSISR_BAD_FAULT_64S@l
+	and.	r0,r4,r0		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
@@ -1669,25 +1668,27 @@ _GLOBAL(__replay_interrupt)
 	 * we don't give a damn about, so we don't bother storing them.
 	 */
 	mfmsr	r12
-	LOAD_REG_ADDR(r11, 1f)
+	LOAD_REG_ADDR(r11, replay_interrupt_return)
 	mfcr	r9
 	ori	r12,r12,MSR_EE
 	cmpwi	r3,0x900
 	beq	decrementer_common
 	cmpwi	r3,0x500
+BEGIN_FTR_SECTION
+	beq	h_virt_irq_common
+FTR_SECTION_ELSE
 	beq	hardware_interrupt_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
-	cmpwi	r3,0xe80
+	cmpwi	r3,0xa00
 	beq	h_doorbell_common_msgclr
-	cmpwi	r3,0xea0
-	beq	h_virt_irq_common
 	cmpwi	r3,0xe60
 	beq	hmi_exception_common
 FTR_SECTION_ELSE
 	cmpwi	r3,0xa00
 	beq	doorbell_super_common_msgclr
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-1:
+replay_interrupt_return:
 	blr
 
 _ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index dc0c49cfd90a..e1431800bfb9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size)
 	return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
 }
 
+int should_fadump_crash(void)
+{
+	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+		return 0;
+	return 1;
+}
+
 int is_fadump_active(void)
 {
 	return fw_dump.dump_active;
@@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
 	struct fadump_crash_info_header *fdh = NULL;
 	int old_cpu, this_cpu;
 
-	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+	if (!should_fadump_crash())
 		return;
 
 	/*
@@ -1446,6 +1453,25 @@ static void fadump_init_files(void)
 	return;
 }
 
+static int fadump_panic_event(struct notifier_block *this,
+			      unsigned long event, void *ptr)
+{
+	/*
+	 * If firmware-assisted dump has been registered then trigger
+	 * firmware-assisted dump and let firmware handle everything
+	 * else. If this returns, then fadump was not registered, so
+	 * go through the rest of the panic path.
+	 */
+	crash_fadump(NULL, ptr);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fadump_panic_block = {
+	.notifier_call = fadump_panic_event,
+	.priority = INT_MIN /* may not return; must be done last */
+};
+
 /*
  * Prepare for firmware-assisted dump.
  */
@@ -1478,6 +1504,9 @@ int __init setup_fadump(void)
 		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
 	fadump_init_files();
 
+	atomic_notifier_chain_register(&panic_notifier_list,
+					&fadump_panic_block);
+
 	return 1;
 }
 subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e22734278458..8c54166491e7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
 	EXCEPTION_PROLOG
 	mfspr	r10,SPRN_DSISR
 	stw	r10,_DSISR(r11)
-	andis.	r0,r10,0xa470		/* weird error? */
+	andis.	r0,r10,DSISR_BAD_FAULT_32S@h
 	bne	1f			/* if not, try to put a PTE */
 	mfspr	r4,SPRN_DAR		/* into the hash table */
 	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */
@@ -403,13 +403,13 @@ DataAccess:
 	DO_KVM  0x400
 InstructionAccess:
 	EXCEPTION_PROLOG
-	andis.	r0,r9,0x4000		/* no pte found? */
+	andis.	r0,r9,SRR1_ISI_NOPT@h	/* no pte found? */
 	beq	1f			/* if so, try to put a PTE */
 	li	r3,0			/* into the hash table */
 	mr	r4,r12			/* SRR0 is fault address */
 	bl	hash_page
 1:	mr	r4,r12
-	mr	r5,r9
+	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
 	EXC_XFER_LITE(0x400, handle_page_fault)
 
 /* External interrupt */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0ddc602b33a4..ff8511d6d8ea 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -92,13 +92,13 @@ END_FTR_SECTION(0, 1)
 	.balign 8
 	.globl  __secondary_hold_spinloop
 __secondary_hold_spinloop:
-	.llong	0x0
+	.8byte	0x0
 
 	/* Secondary processors write this value with their cpu # */
 	/* after they enter the spin loop immediately below.	  */
 	.globl	__secondary_hold_acknowledge
 __secondary_hold_acknowledge:
-	.llong	0x0
+	.8byte	0x0
 
 #ifdef CONFIG_RELOCATABLE
 	/* This flag is set to 1 by a loader if the kernel should run
@@ -650,7 +650,7 @@ __after_prom_start:
 	bctr
 
 .balign 8
-p_end: .llong _end - copy_to_here
+p_end: .8byte _end - copy_to_here
 
 4:
 	/*
@@ -892,7 +892,7 @@ _GLOBAL(relative_toc)
 	blr
 
 .balign 8
-p_toc:	.llong	__toc_start + 0x8000 - 0b
+p_toc:	.8byte	__toc_start + 0x8000 - 0b
 
 /*
  * This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c032fe8c2d26..4fee00d414e8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -50,18 +50,20 @@
 	mtspr	spr, reg
 #endif
 
-/* Macro to test if an address is a kernel address */
 #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-#define IS_KERNEL(tmp, addr)		\
-	andis.	tmp, addr, 0x8000	/* Address >= 0x80000000 */
-#define BRANCH_UNLESS_KERNEL(label)	beq	label
-#else
-#define IS_KERNEL(tmp, addr)		\
-	rlwinm	tmp, addr, 16, 16, 31;	\
-	cmpli	cr0, tmp, PAGE_OFFSET >> 16
-#define BRANCH_UNLESS_KERNEL(label)	blt	label
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+#define SIMPLE_KERNEL_ADDRESS		1
 #endif
 
+/*
+ * We need an ITLB miss handler for kernel addresses if:
+ * - Either we have modules
+ * - Or we have not pinned the first 8M
+ */
+#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
+    defined(CONFIG_DEBUG_PAGEALLOC)
+#define ITLB_MISS_KERNEL	1
+#endif
 
 /*
  * Value for the bits that have fixed value in RPN entries.
@@ -123,7 +125,6 @@ turn_on_mmu:
 	lis	r0,start_here@h
 	ori	r0,r0,start_here@l
 	mtspr	SPRN_SRR0,r0
-	SYNC
 	rfi				/* enables MMU */
 
 /*
@@ -170,7 +171,7 @@ turn_on_mmu:
 	stw	r1,0(r11);	\
 	tovirt(r1,r11);			/* set new kernel sp */	\
 	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
-	MTMSRD(r10);			/* (except for mach check in rtas) */ \
+	mtmsr	r10;		\
 	stw	r0,GPR0(r11);	\
 	SAVE_4GPRS(3, r11);	\
 	SAVE_2GPRS(7, r11)
@@ -300,7 +301,7 @@ SystemCall:
 /* On the MPC8xx, this is a software emulation interrupt.  It occurs
  * for all unimplemented and illegal instructions.
  */
-	EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+	EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
 
 	. = 0x1100
 /*
@@ -325,7 +326,7 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mtspr	SPRN_SPRG_SCRATCH2, r3
 #endif
 	EXCEPTION_PROLOG_0
@@ -343,15 +344,32 @@ InstructionTLBMiss:
 	INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
 	/* Only modules will cause ITLB Misses as we always
 	 * pin the first 8MB of kernel memory */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mfcr	r3
 #endif
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
-	IS_KERNEL(r11, r10)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+	andis.	r11, r10, 0x8000	/* Address >= 0x80000000 */
+#else
+	rlwinm	r11, r10, 16, 0xfff8
+	cmpli	cr0, r11, PAGE_OFFSET@h
+#ifndef CONFIG_PIN_TLB_TEXT
+	/* It is assumed that kernel code fits into the first 8M page */
+_ENTRY(ITLBMiss_cmp)
+	cmpli	cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+#endif
+#endif
 #endif
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
-	BRANCH_UNLESS_KERNEL(3f)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+	beq+	3f
+#else
+	blt+	3f
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+	blt	cr7, ITLBMissLinear
+#endif
 	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
 #endif
@@ -369,7 +387,7 @@ InstructionTLBMiss:
 	rlwimi	r10, r11, 0, 0, 32 - PAGE_SHIFT - 1	/* Add level 2 base */
 	lwz	r10, 0(r10)	/* Get the pte */
 4:
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mtcr	r3
 #endif
 	/* Insert the APG into the TWC from the Linux PTE. */
@@ -400,7 +418,7 @@ InstructionTLBMiss:
 	MTSPR_CPU6(SPRN_MI_RPN, r10, r3)	/* Update TLB entry */
 
 	/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mfspr	r3, SPRN_SPRG_SCRATCH2
 #endif
 	EXCEPTION_EPILOG_0
@@ -447,23 +465,23 @@ DataStoreTLBMiss:
 	 * kernel page tables.
 	 */
 	mfspr	r10, SPRN_MD_EPN
-	rlwinm	r10, r10, 16, 0xfff8
-	cmpli	cr0, r10, PAGE_OFFSET@h
+	rlwinm	r11, r10, 16, 0xfff8
+	cmpli	cr0, r11, PAGE_OFFSET@h
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
 	blt+	3f
+	rlwinm	r11, r10, 16, 0xfff8
 #ifndef CONFIG_PIN_TLB_IMMR
-	cmpli	cr0, r10, VIRT_IMMR_BASE@h
+	cmpli	cr0, r11, VIRT_IMMR_BASE@h
 #endif
 _ENTRY(DTLBMiss_cmp)
-	cmpli	cr7, r10, (PAGE_OFFSET + 0x1800000)@h
-	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
 #ifndef CONFIG_PIN_TLB_IMMR
 _ENTRY(DTLBMiss_jmp)
 	beq-	DTLBMissIMMR
 #endif
 	blt	cr7, DTLBMissLinear
+	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
-	mfspr	r10, SPRN_MD_EPN
 
 	/* Insert level 1 index */
 	rlwimi	r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp)
 InstructionTLBError:
 	EXCEPTION_PROLOG
 	mr	r4,r12
-	mr	r5,r9
-	andis.	r10,r5,0x4000
+	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+	andis.	r10,r9,SRR1_ISI_NOPT@h
 	beq+	1f
 	tlbie	r4
 itlbie:
@@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
 	mfspr	r5,SPRN_DSISR
 	stw	r5,_DSISR(r11)
 	mfspr	r4,SPRN_DAR
-	andis.	r10,r5,0x4000
+	andis.	r10,r5,DSISR_NOHPTE@h
 	beq+	1f
 	tlbie	r4
 dtlbie:
@@ -684,7 +702,7 @@ DTLBMissLinear:
 	/* Set 8M byte page and mark it valid */
 	li	r11, MD_PS8MEG | MD_SVALID
 	MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
-	rlwinm	r10, r10, 16, 0x0f800000	/* 8xx supports max 256Mb RAM */
+	rlwinm	r10, r10, 0, 0x0f800000	/* 8xx supports max 256Mb RAM */
 	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
 			  _PAGE_PRESENT
 	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
@@ -695,6 +713,22 @@ DTLBMissLinear:
 	EXCEPTION_EPILOG_0
 	rfi
 
+#ifndef CONFIG_PIN_TLB_TEXT
+ITLBMissLinear:
+	mtcr	r3
+	/* Set 8M byte page and mark it valid */
+	li	r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
+	MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+	rlwinm	r10, r10, 0, 0x0f800000	/* 8xx supports max 256Mb RAM */
+	ori	r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
+			  _PAGE_PRESENT
+	MTSPR_CPU6(SPRN_MI_RPN, r10, r11)	/* Update TLB entry */
+
+	mfspr	r3, SPRN_SPRG_SCRATCH2
+	EXCEPTION_EPILOG_0
+	rfi
+#endif
+
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
  * by decoding the registers used by the dcbx instruction and adding them.
  * DAR is set to the calculated address.
@@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	mtspr	SPRN_SPRG_SCRATCH2, r10
 	/* fetch instruction from memory. */
 	mfspr	r10, SPRN_SRR0
-	IS_KERNEL(r11, r10)
+	rlwinm	r11, r10, 16, 0xfff8
+	cmpli	cr0, r11, PAGE_OFFSET@h
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
-	BRANCH_UNLESS_KERNEL(3f)
+	blt+	3f
 	rlwinm	r11, r10, 16, 0xfff8
 _ENTRY(FixupDAR_cmp)
 	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -915,10 +950,8 @@ start_here:
 	rfi
 /* Load up the kernel context */
 2:
-	SYNC			/* Force all PTE updates to finish */
 	tlbia			/* Clear all TLB entries */
 	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
 
 	/* set up the PTE pointers for the Abatron bdiGDB.
 	*/
@@ -955,15 +988,14 @@ initial_mmu:
 	mtspr	SPRN_MD_CTR, r10	/* remove PINNED DTLB entries */
 
 	tlbia			/* Invalidate all TLB entries */
-/* Always pin the first 8 MB ITLB to prevent ITLB
-   misses while mucking around with SRR0/SRR1 in asm
-*/
+#ifdef CONFIG_PIN_TLB_TEXT
 	lis	r8, MI_RSV4I@h
 	ori	r8, r8, 0x1c00
 
 	mtspr	SPRN_MI_CTR, r8	/* Set instruction MMU control */
+#endif
 
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
 	oris	r10, r10, MD_RSV4I@h
 	mtspr	SPRN_MD_CTR, r10	/* Set data TLB control */
 #endif
@@ -989,6 +1021,7 @@ initial_mmu:
 	 * internal registers (among other things).
 	 */
 #ifdef CONFIG_PIN_TLB_IMMR
+	oris	r10, r10, MD_RSV4I@h
 	ori	r10, r10, 0x1c00
 	mtspr	SPRN_MD_CTR, r10
 
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index e6252c5a57a4..1125c9be9e06 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	std	r3,_WORT(r1)
 	mfspr	r3,SPRN_WORC
 	std	r3,_WORC(r1)
+/*
+ * On POWER9, there are idle states such as stop4, invoked via cpuidle,
+ * that lose hypervisor resources. In such cases, we need to save
+ * additional SPRs before entering those idle states so that they can
+ * be restored to their older values on wakeup from the idle state.
+ *
+ * On POWER8, the only such deep idle state is winkle which is used
+ * only in the context of CPU-Hotplug, where these additional SPRs are
+ * reinitiazed to a sane value. Hence there is no need to save/restore
+ * these SPRs.
+ */
+BEGIN_FTR_SECTION
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
+power9_save_additional_sprs:
+	mfspr	r3, SPRN_PID
+	mfspr	r4, SPRN_LDBAR
+	std	r3, STOP_PID(r13)
+	std	r4, STOP_LDBAR(r13)
 
+	mfspr	r3, SPRN_FSCR
+	mfspr	r4, SPRN_HFSCR
+	std	r3, STOP_FSCR(r13)
+	std	r4, STOP_HFSCR(r13)
+
+	mfspr	r3, SPRN_MMCRA
+	mfspr	r4, SPRN_MMCR1
+	std	r3, STOP_MMCRA(r13)
+	std	r4, STOP_MMCR1(r13)
+
+	mfspr	r3, SPRN_MMCR2
+	std	r3, STOP_MMCR2(r13)
+	blr
+
+power9_restore_additional_sprs:
+	ld	r3,_LPCR(r1)
+	ld	r4, STOP_PID(r13)
+	mtspr	SPRN_LPCR,r3
+	mtspr	SPRN_PID, r4
+
+	ld	r3, STOP_LDBAR(r13)
+	ld	r4, STOP_FSCR(r13)
+	mtspr	SPRN_LDBAR, r3
+	mtspr	SPRN_FSCR, r4
+
+	ld	r3, STOP_HFSCR(r13)
+	ld	r4, STOP_MMCRA(r13)
+	mtspr	SPRN_HFSCR, r3
+	mtspr	SPRN_MMCRA, r4
+	/* We have already restored PACA_MMCR0 */
+	ld	r3, STOP_MMCR1(r13)
+	ld	r4, STOP_MMCR2(r13)
+	mtspr	SPRN_MMCR1, r3
+	mtspr	SPRN_MMCR2, r4
 	blr
 
 /*
@@ -141,7 +195,16 @@ pnv_powersave_common:
 	std	r5,_CCR(r1)
 	std	r1,PACAR1(r13)
 
+BEGIN_FTR_SECTION
+	/*
+	 * POWER9 does not require real mode to stop, and presently does not
+	 * set hwthread_state for KVM (threads don't share MMU context), so
+	 * we can remain in virtual mode for this.
+	 */
+	bctr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	/*
+	 * POWER8
 	 * Go to real mode to do the nap, as required by the architecture.
 	 * Also, we need to be in real mode before setting hwthread_state,
 	 * because as soon as we do that, another thread can switch
@@ -151,6 +214,20 @@ pnv_powersave_common:
 	mtmsrd	r7,0
 	bctr
 
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
+	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
+	std	r0,0(r1);					\
+	ptesync;						\
+	ld	r0,0(r1);					\
+236:	cmpd	cr0,r0,r0;					\
+	bne	236b;						\
+	IDLE_INST;
+
+
 	.globl pnv_enter_arch207_idle_mode
 pnv_enter_arch207_idle_mode:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -242,20 +319,27 @@ enter_winkle:
 /*
  * r3 - PSSCR value corresponding to the requested stop state.
  */
-power_enter_stop:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	/* Tell KVM we're entering idle */
+power_enter_stop_kvm_rm:
+	/*
+	 * This is currently unused because POWER9 KVM does not have to
+	 * gather secondary threads into sibling mode, but the code is
+	 * here in case that function is required.
+	 *
+	 * Tell KVM we're entering idle.
+	 */
 	li	r4,KVM_HWTHREAD_IN_IDLE
 	/* DO THIS IN REAL MODE!  See comment above. */
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
+power_enter_stop:
 /*
  * Check if we are executing the lite variant with ESL=EC=0
  */
 	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
 	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
 	bne	 .Lhandle_esl_ec_set
-	IDLE_STATE_ENTER_SEQ(PPC_STOP)
+	PPC_STOP
 	li	r3,0  /* Since we didn't lose state, return 0 */
 
 	/*
@@ -288,7 +372,8 @@ power_enter_stop:
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 	cmpd	r3,r4
 	bge	.Lhandle_deep_stop
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+	PPC_STOP	/* Does not return (system reset interrupt) */
+
 .Lhandle_deep_stop:
 /*
  * Entering deep idle state.
@@ -310,7 +395,7 @@ lwarx_loop_stop:
 
 	bl	save_sprs_to_stack
 
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+	PPC_STOP	/* Does not return (system reset interrupt) */
 
 /*
  * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
@@ -411,6 +496,18 @@ pnv_powersave_wakeup_mce:
 
 	b	pnv_powersave_wakeup
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm_start_guest_check:
+	li	r0,KVM_HWTHREAD_IN_KERNEL
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r0,0
+	beqlr
+	b	kvm_start_guest
+#endif
+
 /*
  * Called from reset vector for powersave wakeups.
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -435,15 +532,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	mr	r3,r12
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	b	kvm_start_guest
-1:
+BEGIN_FTR_SECTION
+	bl	kvm_start_guest_check
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #endif
 
 	/* Return SRR1 from power7_nap() */
@@ -809,9 +900,16 @@ no_segments:
 	mtctr	r12
 	bctrl
 
+/*
+ * On POWER9, we can come here on wakeup from a cpuidle stop state.
+ * Hence restore the additional SPRs to the saved value.
+ *
+ * On POWER8, we come here only on winkle. Since winkle is used
+ * only in the case of CPU-Hotplug, we don't need to restore
+ * the additional SPRs.
+ */
 BEGIN_FTR_SECTION
-	ld	r4,_LPCR(r1)
-	mtspr	SPRN_LPCR,r4
+	bl 	power9_restore_additional_sprs
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 hypervisor_state_restored:
 
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index a582e0d42525..aa9f1b8261db 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -19,6 +19,8 @@
 #include <asm/pgtable.h>
 #include <asm/ppc-pci.h>
 #include <asm/io-workarounds.h>
+#include <asm/pte-walk.h>
+
 
 #define IOWA_MAX_BUS	8
 
@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 		 * We won't find huge pages here (iomem). Also can't hit
 		 * a page table free due to init_mm
 		 */
-		ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
-						   NULL, &hugepage_shift);
+		ptep = find_init_mm_pte(vaddr, &hugepage_shift);
 		if (ptep == NULL)
 			paddr = 0;
 		else {
@@ -192,7 +193,7 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
 
 	if (iowa_bus_count >= IOWA_MAX_BUS) {
 		pr_err("IOWA:Too many pci bridges, "
-		       "workarounds disabled for %s\n", np->full_name);
+		       "workarounds disabled for %pOF\n", np);
 		return;
 	}
 
@@ -207,6 +208,6 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
 
 	iowa_bus_count++;
 
-	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+	pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np);
 }
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 233ca3fe4754..af7a20dc6e09 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -127,8 +127,7 @@ static ssize_t fail_iommu_store(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
-		   fail_iommu_store);
+static DEVICE_ATTR_RW(fail_iommu);
 
 static int fail_iommu_bus_notify(struct notifier_block *nb,
 				 unsigned long action, void *data)
@@ -190,7 +189,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
 	unsigned int pool_nr;
 	struct iommu_pool *pool;
 
-	align_mask = 0xffffffffffffffffl >> (64 - align_order);
+	align_mask = (1ull << align_order) - 1;
 
 	/* This allocator was derived from x86_64's bit string search */
 
@@ -208,7 +207,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
 	 * We don't need to disable preemption here because any CPU can
 	 * safely use any IOMMU pool.
 	 */
-	pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
+	pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
 
 	if (largealloc)
 		pool = &(tbl->large_pool);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f291f7826abc..4e65bf82f5e0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -24,7 +24,7 @@
  * mask register (of which only 16 are defined), hence the weird shifting
  * and complement of the cached_irq_mask.  I want to be able to stuff
  * this right into the SIU SMASK register.
- * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
  * to reduce code space and undefined function references.
  */
 
@@ -143,9 +143,10 @@ notrace unsigned int __check_irq_replay(void)
 	 */
 	unsigned char happened = local_paca->irq_happened;
 
-	/* Clear bit 0 which we wouldn't clear otherwise */
-	local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
 	if (happened & PACA_IRQ_HARD_DIS) {
+		/* Clear bit 0 which we wouldn't clear otherwise */
+		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
 		/*
 		 * We may have missed a decrementer interrupt if hard disabled.
 		 * Check the decrementer register in case we had a rollover
@@ -173,41 +174,39 @@ notrace unsigned int __check_irq_replay(void)
 	 * This is a higher priority interrupt than the others, so
 	 * replay it first.
 	 */
-	local_paca->irq_happened &= ~PACA_IRQ_HMI;
-	if (happened & PACA_IRQ_HMI)
+	if (happened & PACA_IRQ_HMI) {
+		local_paca->irq_happened &= ~PACA_IRQ_HMI;
 		return 0xe60;
+	}
 
-	/*
-	 * We may have missed a decrementer interrupt. We check the
-	 * decrementer itself rather than the paca irq_happened field
-	 * in case we also had a rollover while hard disabled
-	 */
-	local_paca->irq_happened &= ~PACA_IRQ_DEC;
-	if (happened & PACA_IRQ_DEC)
+	if (happened & PACA_IRQ_DEC) {
+		local_paca->irq_happened &= ~PACA_IRQ_DEC;
 		return 0x900;
+	}
 
-	/* Finally check if an external interrupt happened */
-	local_paca->irq_happened &= ~PACA_IRQ_EE;
-	if (happened & PACA_IRQ_EE)
+	if (happened & PACA_IRQ_EE) {
+		local_paca->irq_happened &= ~PACA_IRQ_EE;
 		return 0x500;
+	}
 
 #ifdef CONFIG_PPC_BOOK3E
-	/* Finally check if an EPR external interrupt happened
-	 * this bit is typically set if we need to handle another
-	 * "edge" interrupt from within the MPIC "EPR" handler
+	/*
+	 * Check if an EPR external interrupt happened this bit is typically
+	 * set if we need to handle another "edge" interrupt from within the
+	 * MPIC "EPR" handler.
 	 */
-	local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
-	if (happened & PACA_IRQ_EE_EDGE)
+	if (happened & PACA_IRQ_EE_EDGE) {
+		local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
 		return 0x500;
+	}
 
-	local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-	if (happened & PACA_IRQ_DBELL)
+	if (happened & PACA_IRQ_DBELL) {
+		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
 		return 0x280;
+	}
 #else
-	local_paca->irq_happened &= ~PACA_IRQ_DBELL;
 	if (happened & PACA_IRQ_DBELL) {
-		if (cpu_has_feature(CPU_FTR_HVMODE))
-			return 0xe80;
+		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
 		return 0xa00;
 	}
 #endif /* CONFIG_PPC_BOOK3E */
@@ -483,6 +482,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 		seq_printf(p, "  Hypervisor Maintenance Interrupts\n");
 	}
 
+	seq_printf(p, "%*s: ", prec, "NMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs);
+	seq_printf(p, "  System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+	seq_printf(p, "%*s: ", prec, "WDG");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs);
+	seq_printf(p, "  Watchdog soft-NMI interrupts\n");
+#endif
+
 #ifdef CONFIG_PPC_DOORBELL
 	if (cpu_has_feature(CPU_FTR_DBELL)) {
 		seq_printf(p, "%*s: ", prec, "DBL");
@@ -507,6 +518,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += per_cpu(irq_stat, cpu).spurious_irqs;
 	sum += per_cpu(irq_stat, cpu).timer_irqs_others;
 	sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+	sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+	sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
 #ifdef CONFIG_PPC_DOORBELL
 	sum += per_cpu(irq_stat, cpu).doorbell_irqs;
 #endif
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index bb6f8993412e..1df6c74aa731 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -164,7 +164,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
 	/* Set the global ISA io base to indicate we have an ISA bridge */
 	isa_io_base = ISA_IO_BASE;
 
-	pr_debug("ISA bridge (early) is %s\n", np->full_name);
+	pr_debug("ISA bridge (early) is %pOF\n", np);
 }
 
 /**
@@ -187,15 +187,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 	pna = of_n_addr_cells(np);
 	if (of_property_read_u32(np, "#address-cells", &na) ||
 	    of_property_read_u32(np, "#size-cells", &ns)) {
-		pr_warn("ISA: Non-PCI bridge %s is missing address format\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
+			np);
 		return;
 	}
 
 	/* Check it's a supported address format */
 	if (na != 2 || ns != 1) {
-		pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
+			np);
 		return;
 	}
 	rs = na + ns + pna;
@@ -203,8 +203,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 	/* Grab the ranges property */
 	ranges = of_get_property(np, "ranges", &rlen);
 	if (ranges == NULL || rlen < rs) {
-		pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
+			np);
 		return;
 	}
 
@@ -220,8 +220,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 
 	/* Got something ? */
 	if (!size || !pbasep) {
-		pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
+			np);
 		return;
 	}
 
@@ -233,15 +233,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 	/* Map pbase */
 	pbase = of_translate_address(np, pbasep);
 	if (pbase == OF_BAD_ADDR) {
-		pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
+			np);
 		return;
 	}
 
 	/* We need page alignment */
 	if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
-		pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n",
-			np->full_name);
+		pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
+			np);
 		return;
 	}
 
@@ -255,7 +255,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 	__ioremap_at(pbase, (void *)ISA_IO_BASE,
 		     size, pgprot_val(pgprot_noncached(__pgprot(0))));
 
-	pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name);
+	pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
 }
 
 /**
@@ -277,8 +277,8 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
 	/* Set the global ISA io base to indicate we have an ISA bridge */
 	isa_io_base = ISA_IO_BASE;
 
-	pr_debug("ISA bridge (late) is %s on %s\n",
-		 devnode->full_name, pci_name(pdev));
+	pr_debug("ISA bridge (late) is %pOF on %s\n",
+		 devnode, pci_name(pdev));
 }
 
 /**
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index dbf098121ce6..35e240a0a408 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -67,9 +67,9 @@ static struct hard_trap_info
 #endif
 #else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
 	{ 0x0d00, 0x05 /* SIGTRAP */ },		/* single-step */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
 	{ 0x1000, 0x04 /* SIGILL */  },		/* software emulation */
-#else /* ! CONFIG_8xx */
+#else /* ! CONFIG_PPC_8xx */
 	{ 0x0f00, 0x04 /* SIGILL */  },		/* performance monitor */
 	{ 0x0f20, 0x08 /* SIGFPE */  },		/* altivec unavailable */
 	{ 0x1300, 0x05 /* SIGTRAP */ }, 	/* instruction address break */
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 1086ea37c832..9ad37f827a97 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,7 +25,6 @@
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/nmi.h> /* hardlockup_detector_disable() */
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void)
 
 static int __init kvm_guest_init(void)
 {
-	/*
-	 * The hardlockup detector is likely to get false positives in
-	 * KVM guests, so disable it by default.
-	 */
-	hardlockup_detector_disable();
-
 	if (!kvm_para_available())
 		goto free_tmp;
 
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 97ec8557f974..6408f09dbbd9 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
 	mtctr	r4
 	li	r4,0
 1:
-	lwzx	r0,r0,r4
+	lwzx	r0,0,r4
 	addi	r4,r4,32		/* Go to start of next cache line */
 	bdnz	1b
 	isync
@@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
 	mtctr	r4
 	li	r4,0
 1:
-	lwzx	r0,r0,r4
+	lwzx	r0,0,r4
 	dcbf	0,r4
 	addi	r4,r4,32		/* Go to start of next cache line */
 	bdnz	1b
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 0694d20f85b6..5e5a64a8b4e4 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -147,8 +147,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 		legacy_serial_ports[index].serial_out = tsi_serial_out;
 	}
 
-	printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
-	       index, np->full_name);
+	printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n",
+	       index, np);
 	printk(KERN_DEBUG "  %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
 	       (iotype == UPIO_PORT) ? "port" : "mem",
 	       (unsigned long long)base, (unsigned long long)taddr, irq,
@@ -207,7 +207,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 	int index = -1;
 	u64 taddr;
 
-	DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+	DBG(" -> add_legacy_isa_port(%pOF)\n", np);
 
 	/* Get the ISA port number */
 	reg = of_get_property(np, "reg", NULL);
@@ -256,7 +256,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	unsigned int flags;
 	int iotype, index = -1, lindex = 0;
 
-	DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+	DBG(" -> add_legacy_pci_port(%pOF)\n", np);
 
 	/* We only support ports that have a clock frequency properly
 	 * encoded in the device-tree (that is have an fcode). Anything
@@ -374,7 +374,7 @@ void __init find_legacy_serial_ports(void)
 	if (path != NULL) {
 		stdout = of_find_node_by_path(path);
 		if (stdout)
-			DBG("stdout is %s\n", stdout->full_name);
+			DBG("stdout is %pOF\n", stdout);
 	} else {
 		DBG(" no linux,stdout-path !\n");
 	}
@@ -603,7 +603,7 @@ static int __init check_legacy_serial_console(void)
 		DBG(" can't find stdout package %s !\n", name);
 		return -ENODEV;
 	}
-	DBG("stdout is %s\n", prom_stdout->full_name);
+	DBG("stdout is %pOF\n", prom_stdout);
 
 	name = of_get_property(prom_stdout, "name", NULL);
 	if (!name) {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e0e131e662ed..9b2ea7e71c06 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -22,11 +22,14 @@
 #undef DEBUG
 #define pr_fmt(fmt) "mce: " fmt
 
+#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
 #include <linux/irq_work.h>
+
+#include <asm/machdep.h>
 #include <asm/mce.h>
 
 static DEFINE_PER_CPU(int, mce_nest_count);
@@ -446,3 +449,33 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 	return 0;
 }
 EXPORT_SYMBOL(get_mce_fault_addr);
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+	long handled = 0;
+
+	__this_cpu_inc(irq_stat.mce_exceptions);
+
+	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+		handled = cur_cpu_spec->machine_check_early(regs);
+	return handled;
+}
+
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+	__this_cpu_inc(irq_stat.hmi_exceptions);
+
+	wait_for_subcore_guest_exit();
+
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(regs);
+
+	wait_for_tb_resync();
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 34aeac54f120..becaec990140 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -45,7 +45,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
 	if (ppc_md.pci_setup_phb == NULL)
 		return -ENODEV;
 
-	pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+	pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node);
 
 	/* Alloc and setup PHB data structure */
 	phb = pcibios_alloc_controller(dev->dev.of_node);
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 4937bef7652f..52fc864cdec4 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -60,10 +60,6 @@ optprobe_template_entry:
 	std	r5,_CCR(r1)
 	lbz     r5,PACASOFTIRQEN(r13)
 	std     r5,SOFTE(r1)
-	mfdar	r5
-	std	r5,_DAR(r1)
-	mfdsisr	r5
-	std	r5,_DSISR(r1)
 
 	/*
 	 * We may get here from a module, so load the kernel TOC in r2.
@@ -122,10 +118,6 @@ optprobe_template_call_emulate:
 	mtxer	r5
 	ld	r5,_CCR(r1)
 	mtcr	r5
-	ld	r5,_DAR(r1)
-	mtdar	r5
-	ld	r5,_DSISR(r1)
-	mtdsisr	r5
 	REST_GPR(0,r1)
 	REST_10GPRS(2,r1)
 	REST_10GPRS(12,r1)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 8d63627e067f..70f073d6c3b2 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -99,18 +99,27 @@ static inline void free_lppacas(void) { }
  * If you make the number of persistent SLB entries dynamic, please also
  * update PR KVM to flush and restore them accordingly.
  */
-static struct slb_shadow *slb_shadow;
+static struct slb_shadow * __initdata slb_shadow;
 
 static void __init allocate_slb_shadows(int nr_cpus, int limit)
 {
 	int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+
+	if (early_radix_enabled())
+		return;
+
 	slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
 	memset(slb_shadow, 0, size);
 }
 
 static struct slb_shadow * __init init_slb_shadow(int cpu)
 {
-	struct slb_shadow *s = &slb_shadow[cpu];
+	struct slb_shadow *s;
+
+	if (early_radix_enabled())
+		return NULL;
+
+	s = &slb_shadow[cpu];
 
 	/*
 	 * When we come through here to initialise boot_paca, the slb_shadow
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 341a7469cab8..02831a396419 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -373,9 +373,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 		if (virq)
 			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
 	} else {
-		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
-			 oirq.args_count, oirq.args[0], oirq.args[1],
-			 of_node_full_name(oirq.np));
+		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n",
+			 oirq.args_count, oirq.args[0], oirq.args[1], oirq.np);
 
 		virq = irq_create_of_mapping(&oirq);
 	}
@@ -741,8 +740,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 	struct of_pci_range range;
 	struct of_pci_range_parser parser;
 
-	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
-	       dev->full_name, primary ? "(primary)" : "");
+	printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+	       dev, primary ? "(primary)" : "");
 
 	/* Check for ranges property */
 	if (of_pci_range_parser_init(&parser, dev))
@@ -1556,8 +1555,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
 
 	if (!res->flags) {
 		pr_debug("PCI: I/O resource not set for host"
-			 " bridge %s (domain %d)\n",
-			 hose->dn->full_name, hose->global_number);
+			 " bridge %pOF (domain %d)\n",
+			 hose->dn, hose->global_number);
 	} else {
 		offset = pcibios_io_space_offset(hose);
 
@@ -1668,7 +1667,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
 	struct device_node *node = hose->dn;
 	int mode;
 
-	pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+	pr_debug("PCI: Scanning PHB %pOF\n", node);
 
 	/* Get some IO space for the new PHB */
 	pcibios_setup_phb_io_space(hose);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 41c86c6b6e4d..1d817f4d97d9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -79,8 +79,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
 		return;
 	bus_range = of_get_property(node, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s, "
-		       "assuming it starts at 0\n", node->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, "
+		       "assuming it starts at 0\n", node);
 		pci_to_OF_bus_map[pci_bus] = 0;
 	} else
 		pci_to_OF_bus_map[pci_bus] = bus_range[0];
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ed5e9ff61a68..932b9741aa8f 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -111,7 +111,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
 	if (hose->io_base_alloc == NULL)
 		return 0;
 
-	pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+	pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
 	pr_debug("  alloc=0x%p\n", hose->io_base_alloc);
 
 	/* This is a PHB, we fully unmap the IO area */
@@ -151,7 +151,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
 	hose->io_base_virt = (void __iomem *)(area->addr +
 					      hose->io_base_phys - phys_page);
 
-	pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+	pr_debug("IO mapping for PHB %pOF\n", hose->dn);
 	pr_debug("  phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
 		 hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
 	pr_debug("  size=0x%016llx (alloc=0x%016lx)\n",
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 592693437070..0e395afbf0f4 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,7 +139,6 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 
 #ifdef CONFIG_PCI_IOV
 static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
-					   struct pci_dev *pdev,
 					   int vf_index,
 					   int busno, int devfn)
 {
@@ -150,10 +149,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
 		return NULL;
 
 	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
-	if (!pdn) {
-		dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
+	if (!pdn)
 		return NULL;
-	}
 
 	pdn->phb = parent->phb;
 	pdn->parent = parent;
@@ -167,13 +164,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
 	INIT_LIST_HEAD(&pdn->list);
 	list_add_tail(&pdn->list, &parent->child_list);
 
-	/*
-	 * If we already have PCI device instance, lets
-	 * bind them.
-	 */
-	if (pdev)
-		pdev->dev.archdata.pci_data = pdn;
-
 	return pdn;
 }
 #endif
@@ -201,7 +191,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
 	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
 		struct eeh_dev *edev __maybe_unused;
 
-		pdn = add_one_dev_pci_data(parent, NULL, i,
+		pdn = add_one_dev_pci_data(parent, i,
 					   pci_iov_virtfn_bus(pdev, i),
 					   pci_iov_virtfn_devfn(pdev, i));
 		if (!pdn) {
@@ -303,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
 	if (pdn == NULL)
 		return NULL;
 	dn->data = pdn;
-	pdn->node = dn;
 	pdn->phb = hose;
 #ifdef CONFIG_PPC_POWERNV
 	pdn->pe_number = IODA_INVALID_PE;
@@ -352,6 +341,7 @@ EXPORT_SYMBOL_GPL(pci_add_device_node_info);
 void pci_remove_device_node_info(struct device_node *dn)
 {
 	struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+	struct device_node *parent;
 #ifdef CONFIG_EEH
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
@@ -364,8 +354,10 @@ void pci_remove_device_node_info(struct device_node *dn)
 
 	WARN_ON(!list_empty(&pdn->child_list));
 	list_del(&pdn->list);
-	if (pdn->parent)
-		of_node_put(pdn->parent->node);
+
+	parent = of_get_parent(dn);
+	if (parent)
+		of_node_put(parent);
 
 	dn->data = NULL;
 	kfree(pdn);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ea3d98115b88..0d790f8432d2 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -211,19 +211,19 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 	unsigned int flags;
 	u64 size;
 
-	pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+	pr_debug("of_scan_pci_bridge(%pOF)\n", node);
 
 	/* parse bus-range property */
 	busrange = of_get_property(node, "bus-range", &len);
 	if (busrange == NULL || len != 8) {
-		printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
-		       node->full_name);
+		printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+		       node);
 		return;
 	}
 	ranges = of_get_property(node, "ranges", &len);
 	if (ranges == NULL) {
-		printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
-		       node->full_name);
+		printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+		       node);
 		return;
 	}
 
@@ -233,8 +233,8 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 		bus = pci_add_new_bus(dev->bus, dev,
 				      of_read_number(busrange, 1));
 		if (!bus) {
-			printk(KERN_ERR "Failed to create pci bus for %s\n",
-			       node->full_name);
+			printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+			       node);
 			return;
 		}
 	}
@@ -262,13 +262,13 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 			res = bus->resource[0];
 			if (res->flags) {
 				printk(KERN_ERR "PCI: ignoring extra I/O range"
-				       " for bridge %s\n", node->full_name);
+				       " for bridge %pOF\n", node);
 				continue;
 			}
 		} else {
 			if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
 				printk(KERN_ERR "PCI: too many memory ranges"
-				       " for bridge %s\n", node->full_name);
+				       " for bridge %pOF\n", node);
 				continue;
 			}
 			res = bus->resource[i];
@@ -307,7 +307,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
 	struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
 #endif
 
-	pr_debug("  * %s\n", dn->full_name);
+	pr_debug("  * %pOF\n", dn);
 	if (!of_device_is_available(dn))
 		return NULL;
 
@@ -350,8 +350,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
 	struct device_node *child;
 	struct pci_dev *dev;
 
-	pr_debug("of_scan_bus(%s) bus no %d...\n",
-		 node->full_name, bus->number);
+	pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+		 node, bus->number);
 
 	/* Scan direct children */
 	for_each_child_of_node(node, child) {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1f0fd361e09b..a0c74bbf3454 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -230,7 +230,8 @@ void enable_kernel_fp(void)
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
-static int restore_fp(struct task_struct *tsk) {
+static int restore_fp(struct task_struct *tsk)
+{
 	if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
 		load_fp_state(&current->thread.fp_state);
 		current->thread.load_fp++;
@@ -330,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; }
 #ifdef CONFIG_VSX
 static void __giveup_vsx(struct task_struct *tsk)
 {
-	if (tsk->thread.regs->msr & MSR_FP)
+	unsigned long msr = tsk->thread.regs->msr;
+
+	/*
+	 * We should never be ssetting MSR_VSX without also setting
+	 * MSR_FP and MSR_VEC
+	 */
+	WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+	/* __giveup_fpu will clear MSR_VSX */
+	if (msr & MSR_FP)
 		__giveup_fpu(tsk);
-	if (tsk->thread.regs->msr & MSR_VEC)
+	if (msr & MSR_VEC)
 		__giveup_altivec(tsk);
-	tsk->thread.regs->msr &= ~MSR_VSX;
 }
 
 static void giveup_vsx(struct task_struct *tsk)
@@ -346,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk)
 	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
 
-static void save_vsx(struct task_struct *tsk)
-{
-	if (tsk->thread.regs->msr & MSR_FP)
-		save_fpu(tsk);
-	if (tsk->thread.regs->msr & MSR_VEC)
-		save_altivec(tsk);
-}
-
 void enable_kernel_vsx(void)
 {
 	unsigned long cpumsr;
@@ -374,10 +375,6 @@ void enable_kernel_vsx(void)
 		 */
 		if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
 			return;
-		if (current->thread.regs->msr & MSR_FP)
-			__giveup_fpu(current);
-		if (current->thread.regs->msr & MSR_VEC)
-			__giveup_altivec(current);
 		__giveup_vsx(current);
 	}
 }
@@ -407,7 +404,6 @@ static int restore_vsx(struct task_struct *tsk)
 }
 #else
 static inline int restore_vsx(struct task_struct *tsk) { return 0; }
-static inline void save_vsx(struct task_struct *tsk) { }
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_SPE
@@ -487,6 +483,8 @@ void giveup_all(struct task_struct *tsk)
 	msr_check_and_set(msr_all_available);
 	check_if_tm_restore_required(tsk);
 
+	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
 #ifdef CONFIG_PPC_FPU
 	if (usermsr & MSR_FP)
 		__giveup_fpu(tsk);
@@ -495,10 +493,6 @@ void giveup_all(struct task_struct *tsk)
 	if (usermsr & MSR_VEC)
 		__giveup_altivec(tsk);
 #endif
-#ifdef CONFIG_VSX
-	if (usermsr & MSR_VSX)
-		__giveup_vsx(tsk);
-#endif
 #ifdef CONFIG_SPE
 	if (usermsr & MSR_SPE)
 		__giveup_spe(tsk);
@@ -553,19 +547,13 @@ void save_all(struct task_struct *tsk)
 
 	msr_check_and_set(msr_all_available);
 
-	/*
-	 * Saving the way the register space is in hardware, save_vsx boils
-	 * down to a save_fpu() and save_altivec()
-	 */
-	if (usermsr & MSR_VSX) {
-		save_vsx(tsk);
-	} else {
-		if (usermsr & MSR_FP)
-			save_fpu(tsk);
+	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
 
-		if (usermsr & MSR_VEC)
-			save_altivec(tsk);
-	}
+	if (usermsr & MSR_FP)
+		save_fpu(tsk);
+
+	if (usermsr & MSR_VEC)
+		save_altivec(tsk);
 
 	if (usermsr & MSR_SPE)
 		__giveup_spe(tsk);
@@ -1392,13 +1380,13 @@ void show_regs(struct pt_regs * regs)
 
 	show_regs_print_info(KERN_DEFAULT);
 
-	printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
 	       regs->nip, regs->link, regs->ctr);
 	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
 	       regs, regs->trap, print_tainted(), init_utsname()->release);
-	printk("MSR: "REG" ", regs->msr);
+	printk("MSR:  "REG" ", regs->msr);
 	print_msr_bits(regs->msr);
-	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
+	pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
 	trap = TRAP(regs);
 	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
 		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
@@ -1991,11 +1979,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 void notrace __ppc64_runlatch_on(void)
 {
 	struct thread_info *ti = current_thread_info();
-	unsigned long ctrl;
 
-	ctrl = mfspr(SPRN_CTRLF);
-	ctrl |= CTRL_RUNLATCH;
-	mtspr(SPRN_CTRLT, ctrl);
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		/*
+		 * Least significant bit (RUN) is the only writable bit of
+		 * the CTRL register, so we can avoid mfspr. 2.06 is not the
+		 * earliest ISA where this is the case, but it's convenient.
+		 */
+		mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+	} else {
+		unsigned long ctrl;
+
+		/*
+		 * Some architectures (e.g., Cell) have writable fields other
+		 * than RUN, so do the read-modify-write.
+		 */
+		ctrl = mfspr(SPRN_CTRLF);
+		ctrl |= CTRL_RUNLATCH;
+		mtspr(SPRN_CTRLT, ctrl);
+	}
 
 	ti->local_flags |= _TLF_RUNLATCH;
 }
@@ -2004,13 +2006,18 @@ void notrace __ppc64_runlatch_on(void)
 void notrace __ppc64_runlatch_off(void)
 {
 	struct thread_info *ti = current_thread_info();
-	unsigned long ctrl;
 
 	ti->local_flags &= ~_TLF_RUNLATCH;
 
-	ctrl = mfspr(SPRN_CTRLF);
-	ctrl &= ~CTRL_RUNLATCH;
-	mtspr(SPRN_CTRLT, ctrl);
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		mtspr(SPRN_CTRLT, 0);
+	} else {
+		unsigned long ctrl;
+
+		ctrl = mfspr(SPRN_CTRLF);
+		ctrl &= ~CTRL_RUNLATCH;
+		mtspr(SPRN_CTRLT, ctrl);
+	}
 }
 #endif /* CONFIG_PPC64 */
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 613f79f03877..02190e90c7ae 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -177,6 +177,7 @@ struct platform_support {
 	bool hash_mmu;
 	bool radix_mmu;
 	bool radix_gtse;
+	bool xive;
 };
 
 /* Platforms codes are now obsolete in the kernel. Now only used within this
@@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val,
 	}
 }
 
+static void __init prom_parse_xive_model(u8 val,
+					 struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+		prom_debug("XIVE - either mode supported\n");
+		support->xive = true;
+		break;
+	case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+		prom_debug("XIVE - exploitation mode supported\n");
+		support->xive = true;
+		break;
+	case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+		prom_debug("XIVE - legacy mode supported\n");
+		break;
+	default:
+		prom_debug("Unknown xive support option: 0x%x\n", val);
+		break;
+	}
+}
+
 static void __init prom_parse_platform_support(u8 index, u8 val,
 					       struct platform_support *support)
 {
@@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
 			support->radix_gtse = true;
 		}
 		break;
+	case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+		prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+				      support);
+		break;
 	}
 }
 
@@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void)
 	struct platform_support supported = {
 		.hash_mmu = false,
 		.radix_mmu = false,
-		.radix_gtse = false
+		.radix_gtse = false,
+		.xive = false
 	};
 	int prop_len = prom_getproplen(prom.chosen,
 				       "ibm,arch-vec-5-platform-support");
@@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void)
 		/* We're probably on a legacy hypervisor */
 		prom_debug("Assuming legacy hash support\n");
 	}
+
+	if (supported.xive) {
+		prom_debug("Asking for XIVE\n");
+		ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+	}
 }
 
 static void __init prom_send_capabilities(void)
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 660ed39e9c9a..07cd22e35405 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1594,11 +1594,8 @@ static int ppr_get(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      void *kbuf, void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				&target->thread.ppr, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.ppr, 0, sizeof(u64));
 }
 
 static int ppr_set(struct task_struct *target,
@@ -1606,11 +1603,8 @@ static int ppr_set(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      const void *kbuf, const void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				&target->thread.ppr, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.ppr, 0, sizeof(u64));
 }
 
 static int dscr_get(struct task_struct *target,
@@ -1618,22 +1612,16 @@ static int dscr_get(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      void *kbuf, void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				&target->thread.dscr, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.dscr, 0, sizeof(u64));
 }
 static int dscr_set(struct task_struct *target,
 		      const struct user_regset *regset,
 		      unsigned int pos, unsigned int count,
 		      const void *kbuf, const void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				&target->thread.dscr, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.dscr, 0, sizeof(u64));
 }
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1642,22 +1630,16 @@ static int tar_get(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      void *kbuf, void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				&target->thread.tar, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.tar, 0, sizeof(u64));
 }
 static int tar_set(struct task_struct *target,
 		      const struct user_regset *regset,
 		      unsigned int pos, unsigned int count,
 		      const void *kbuf, const void __user *ubuf)
 {
-	int ret;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				&target->thread.tar, 0, sizeof(u64));
-	return ret;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.tar, 0, sizeof(u64));
 }
 
 static int ebb_active(struct task_struct *target,
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index d88736fbece6..e8cfc69f59ae 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -82,7 +82,7 @@ _GLOBAL(relocate)
 6:	blr
 
 .balign 8
-p_dyn:	.llong	__dynamic_start - 0b
-p_rela:	.llong	__rela_dyn_start - 0b
-p_st:	.llong	_stext - 0b
+p_dyn:	.8byte	__dynamic_start - 0b
+p_rela:	.8byte	__rela_dyn_start - 0b
+p_st:	.8byte	_stext - 0b
 
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 73f1934582c2..c2b148b1634a 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -91,26 +91,14 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 				unsigned int devfn,
 				int where, int size, u32 *val)
 {
-	struct device_node *busdn, *dn;
 	struct pci_dn *pdn;
-	bool found = false;
 	int ret;
 
-	/* Search only direct children of the bus */
 	*val = 0xFFFFFFFF;
-	busdn = pci_bus_to_OF_node(bus);
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn
-		    && of_device_is_available(dn)) {
-			found = true;
-			break;
-		}
-	}
 
-	if (!found)
-		return PCIBIOS_DEVICE_NOT_FOUND;
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
 
+	/* Validity of pdn is checked in here */
 	ret = rtas_read_config(pdn, where, size, val);
 	if (*val == EEH_IO_ERROR_VALUE(size) &&
 	    eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
@@ -153,24 +141,11 @@ static int rtas_pci_write_config(struct pci_bus *bus,
 				 unsigned int devfn,
 				 int where, int size, u32 val)
 {
-	struct device_node *busdn, *dn;
 	struct pci_dn *pdn;
-	bool found = false;
-
-	/* Search only direct children of the bus */
-	busdn = pci_bus_to_OF_node(bus);
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn
-		    && of_device_is_available(dn)) {
-			found = true;
-			break;
-		}
-	}
 
-	if (!found)
-		return PCIBIOS_DEVICE_NOT_FOUND;
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
 
+	/* Validity of pdn is checked in here. */
 	return rtas_write_config(pdn, where, size, val);
 }
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 94a948207cd2..7de73589d8e2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -481,7 +481,7 @@ void __init smp_setup_cpu_maps(void)
 		__be32 cpu_be;
 		int j, len;
 
-		DBG("  * %s...\n", dn->full_name);
+		DBG("  * %pOF...\n", dn);
 
 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
 				&len);
@@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port)
 }
 EXPORT_SYMBOL(check_legacy_ioport);
 
-static int ppc_panic_event(struct notifier_block *this,
-                             unsigned long event, void *ptr)
-{
-	/*
-	 * If firmware-assisted dump has been registered then trigger
-	 * firmware-assisted dump and let firmware handle everything else.
-	 */
-	crash_fadump(NULL, ptr);
-	ppc_md.panic(ptr);  /* May not return */
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_panic_block = {
-	.notifier_call = ppc_panic_event,
-	.priority = INT_MIN /* may not return; must be done last */
-};
-
-void __init setup_panic(void)
-{
-	if (!ppc_md.panic)
-		return;
-	atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
-}
-
 #ifdef CONFIG_CHECK_CACHE_COHERENCY
 /*
  * For platforms that have configurable cache-coherency.  This function
@@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Probe the machine type, establish ppc_md. */
 	probe_machine();
 
-	/* Setup panic notifier if requested by the platform. */
-	setup_panic();
-
 	/*
 	 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
 	 * it from their respective probe() function.
@@ -916,13 +889,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Reserve large chunks of memory for use by CMA for KVM. */
 	kvm_cma_reserve();
 
-	/*
-	 * Reserve any gigantic pages requested on the command line.
-	 * memblock needs to have been initialized by the time this is
-	 * called since this will reserve memory.
-	 */
-	reserve_hugetlb_gpages();
-
 	klp_init_thread_info(&init_thread_info);
 
 	init_mm.start_code = (unsigned long)_stext;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 2f88f6cf1a42..51ebc01fff52 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -98,6 +98,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
 
 notrace void __init machine_init(u64 dt_ptr)
 {
+	unsigned int *addr = &memset_nocache_branch;
+	unsigned long insn;
+
 	/* Configure static keys first, now that we're relocated. */
 	setup_feature_keys();
 
@@ -105,7 +108,9 @@ notrace void __init machine_init(u64 dt_ptr)
 	udbg_early_init();
 
 	patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP);
-	patch_instruction(&memset_nocache_branch, PPC_INST_NOP);
+
+	insn = create_cond_branch(addr, branch_target(addr), 0x820000);
+	patch_instruction(addr, insn);	/* replace b by bne cr0 */
 
 	/* Do some early initialization based on the flat device tree */
 	early_init_devtree(__va(dt_ptr));
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index af23d4b576ec..b89c6aac48c9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -564,6 +564,9 @@ static __init u64 safe_stack_limit(void)
 	/* Other BookE, we assume the first GB is bolted */
 	return 1ul << 30;
 #else
+	if (early_radix_enabled())
+		return ULONG_MAX;
+
 	/* BookS, the first segment is bolted */
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return 1UL << SID_SHIFT_1T;
@@ -578,7 +581,8 @@ void __init irqstack_early_init(void)
 
 	/*
 	 * Interrupt stacks must be in the first segment since we
-	 * cannot afford to take SLB misses on them.
+	 * cannot afford to take SLB misses on them. They are not
+	 * accessed in realmode.
 	 */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
@@ -649,8 +653,9 @@ void __init emergency_stack_init(void)
 	 * aligned.
 	 *
 	 * Since we use these as temporary stacks during secondary CPU
-	 * bringup, we need to get at them in real mode. This means they
-	 * must also be within the RMO region.
+	 * bringup, machine check, system reset, and HMI, we need to get
+	 * at them in real mode. This means they must also be within the RMO
+	 * region.
 	 *
 	 * The IRQ stacks allocated elsewhere in this file are zeroed and
 	 * initialized in kernel/irq.c. These are initialized here in order
@@ -751,3 +756,31 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in VM guests, so disable it there
+ * by default too.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+	hardlockup_detector_disable();
+#else
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		hardlockup_detector_disable();
+#endif
+
+	return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8d3320562c70..e0a4c1f82e25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 struct thread_info *secondary_ti;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* SMP operations for this machine */
@@ -571,6 +573,26 @@ static void smp_store_cpu_info(int id)
 #endif
 }
 
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+	cpumask_set_cpu(i, get_cpumask(j));
+	cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+		struct cpumask *(*get_cpumask)(int))
+{
+	cpumask_clear_cpu(i, get_cpumask(j));
+	cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int cpu;
@@ -590,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	for_each_possible_cpu(cpu) {
 		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
+		zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
 		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
 					GFP_KERNEL, cpu_to_node(cpu));
 		/*
@@ -602,7 +626,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		}
 	}
 
+	/* Init the cpumasks so the boot CPU is related to itself */
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
 	if (smp_ops && smp_ops->probe)
@@ -828,33 +854,6 @@ int cpu_first_thread_of_core(int core)
 }
 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
-{
-	const struct cpumask *mask;
-	struct device_node *np;
-	int i, plen;
-	const __be32 *prop;
-
-	mask = add ? cpu_online_mask : cpu_present_mask;
-	for_each_cpu(i, mask) {
-		np = of_get_cpu_node(i, NULL);
-		if (!np)
-			continue;
-		prop = of_get_property(np, "ibm,chip-id", &plen);
-		if (prop && plen == sizeof(int) &&
-		    of_read_number(prop, 1) == chipid) {
-			if (add) {
-				cpumask_set_cpu(cpu, cpu_core_mask(i));
-				cpumask_set_cpu(i, cpu_core_mask(cpu));
-			} else {
-				cpumask_clear_cpu(cpu, cpu_core_mask(i));
-				cpumask_clear_cpu(i, cpu_core_mask(cpu));
-			}
-		}
-		of_node_put(np);
-	}
-}
-
 /* Must be called when no change can occur to cpu_present_mask,
  * i.e. during cpu online or offline.
  */
@@ -877,52 +876,93 @@ static struct device_node *cpu_to_l2cache(int cpu)
 	return cache;
 }
 
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
 {
 	struct device_node *l2_cache, *np;
-	const struct cpumask *mask;
-	int i, chip, plen;
-	const __be32 *prop;
-
-	/* First see if we have ibm,chip-id properties in cpu nodes */
-	np = of_get_cpu_node(cpu, NULL);
-	if (np) {
-		chip = -1;
-		prop = of_get_property(np, "ibm,chip-id", &plen);
-		if (prop && plen == sizeof(int))
-			chip = of_read_number(prop, 1);
-		of_node_put(np);
-		if (chip >= 0) {
-			traverse_siblings_chip_id(cpu, add, chip);
-			return;
-		}
-	}
+	int i;
 
 	l2_cache = cpu_to_l2cache(cpu);
-	mask = add ? cpu_online_mask : cpu_present_mask;
-	for_each_cpu(i, mask) {
+	if (!l2_cache)
+		return false;
+
+	for_each_cpu(i, cpu_online_mask) {
+		/*
+		 * when updating the marks the current CPU has not been marked
+		 * online, but we need to update the cache masks
+		 */
 		np = cpu_to_l2cache(i);
 		if (!np)
 			continue;
-		if (np == l2_cache) {
-			if (add) {
-				cpumask_set_cpu(cpu, cpu_core_mask(i));
-				cpumask_set_cpu(i, cpu_core_mask(cpu));
-			} else {
-				cpumask_clear_cpu(cpu, cpu_core_mask(i));
-				cpumask_clear_cpu(i, cpu_core_mask(cpu));
-			}
-		}
+
+		if (np == l2_cache)
+			set_cpus_related(cpu, i, mask_fn);
+
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
+
+	return true;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+	int i;
+
+	/* NB: cpu_core_mask is a superset of the others */
+	for_each_cpu(i, cpu_core_mask(cpu)) {
+		set_cpus_unrelated(cpu, i, cpu_core_mask);
+		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+	}
+}
+#endif
+
+static void add_cpu_to_masks(int cpu)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int chipid = cpu_to_chip_id(cpu);
+	int i;
+
+	/*
+	 * This CPU will not be in the online mask yet so we need to manually
+	 * add it to it's own thread sibling mask.
+	 */
+	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++)
+		if (cpu_online(i))
+			set_cpus_related(i, cpu, cpu_sibling_mask);
+
+	/*
+	 * Copy the thread sibling mask into the cache sibling mask
+	 * and mark any CPUs that share an L2 with this CPU.
+	 */
+	for_each_cpu(i, cpu_sibling_mask(cpu))
+		set_cpus_related(cpu, i, cpu_l2_cache_mask);
+	update_mask_by_l2(cpu, cpu_l2_cache_mask);
+
+	/*
+	 * Copy the cache sibling mask into core sibling mask and mark
+	 * any CPUs on the same chip as this CPU.
+	 */
+	for_each_cpu(i, cpu_l2_cache_mask(cpu))
+		set_cpus_related(cpu, i, cpu_core_mask);
+
+	if (chipid == -1)
+		return;
+
+	for_each_cpu(i, cpu_online_mask)
+		if (cpu_to_chip_id(i) == chipid)
+			set_cpus_related(cpu, i, cpu_core_mask);
+}
+
+static bool shared_caches;
+
 /* Activate a secondary processor. */
 void start_secondary(void *unused)
 {
 	unsigned int cpu = smp_processor_id();
-	int i, base;
 
 	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
@@ -945,22 +985,15 @@ void start_secondary(void *unused)
 
 	vdso_getcpu_init();
 #endif
-	/* Update sibling maps */
-	base = cpu_first_thread_sibling(cpu);
-	for (i = 0; i < threads_per_core; i++) {
-		if (cpu_is_offline(base + i) && (cpu != base + i))
-			continue;
-		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
-		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+	/* Update topology CPU masks */
+	add_cpu_to_masks(cpu);
 
-		/* cpu_core_map should be a superset of
-		 * cpu_sibling_map even if we don't have cache
-		 * information, so update the former here, too.
-		 */
-		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
-		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
-	}
-	traverse_core_siblings(cpu, true);
+	/*
+	 * Check for any shared caches. Note that this must be done on a
+	 * per-core basis because one core in the pair might be disabled.
+	 */
+	if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+		shared_caches = true;
 
 	set_numa_node(numa_cpu_lookup_table[cpu]);
 	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1003,6 +1036,35 @@ static struct sched_domain_topology_level powerpc_topology[] = {
 	{ NULL, },
 };
 
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+	return cpu_l2_cache_mask(cpu);
+}
+
+static struct sched_domain_topology_level power9_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	/*
@@ -1016,14 +1078,23 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	dump_numa_cpu_topology();
 
-	set_sched_topology(powerpc_topology);
+	/*
+	 * If any CPU detects that it's sharing a cache with another CPU then
+	 * use the deeper topology that is aware of this sharing.
+	 */
+	if (shared_caches) {
+		pr_info("Using shared cache scheduler topology\n");
+		set_sched_topology(power9_topology);
+	} else {
+		pr_info("Using standard scheduler topology\n");
+		set_sched_topology(powerpc_topology);
+	}
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 int __cpu_disable(void)
 {
 	int cpu = smp_processor_id();
-	int base, i;
 	int err;
 
 	if (!smp_ops->cpu_disable)
@@ -1034,14 +1105,7 @@ int __cpu_disable(void)
 		return err;
 
 	/* Update sibling maps */
-	base = cpu_first_thread_sibling(cpu);
-	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
-		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
-		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
-		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
-		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
-	}
-	traverse_core_siblings(cpu, false);
+	remove_cpu_from_masks(cpu);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 988f38dced0f..82d8aae81c6a 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -179,7 +179,7 @@ nothing_to_copy:
 	sld	r3, r3, r0
 	li	r0, 0
 1:
-	dcbf	r0,r3
+	dcbf	0,r3
 	addi	r3,r3,0x20
 	bdnz	1b
 
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 4d6b1d3a747f..7ccb7f81f8db 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -17,13 +17,13 @@
 #include <asm/ppc_asm.h>
 
 #ifdef CONFIG_PPC64
-#define SYSCALL(func)		.llong	DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func)	.llong	DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func)		.llong	DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func)		.llong	DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32)	.llong	DOTSYM(f),DOTSYM(f3264)
+#define SYSCALL(func)		.8byte	DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func)	.8byte	DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func)		.8byte	DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func)		.8byte	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func)		.8byte	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func)		.8byte	DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
+#define SYSX(f, f3264, f32)	.8byte	DOTSYM(f),DOTSYM(f3264)
 #else
 #define SYSCALL(func)		.long	sys_##func
 #define COMPAT_SYS(func)	.long	sys_##func
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bfcfd9ef09f2..ec74e203ee04 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -114,6 +114,28 @@ static void pmac_backlight_unblank(void)
 static inline void pmac_backlight_unblank(void) { }
 #endif
 
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+	if (should_fadump_crash())
+		return true;
+	if (kexec_should_crash(current))
+		return true;
+	if (in_interrupt() || panic_on_oops ||
+			!current->pid || is_global_init(current))
+		return true;
+
+	return false;
+}
+
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
@@ -162,21 +184,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
 
 	crash_fadump(regs, "die oops");
 
-	/*
-	 * A system reset (0x100) is a request to dump, so we always send
-	 * it through the crashdump code.
-	 */
-	if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+	if (kexec_should_crash(current))
 		crash_kexec(regs);
 
-		/*
-		 * We aren't the primary crash CPU. We need to send it
-		 * to a holding pattern to avoid it ending up in the panic
-		 * code.
-		 */
-		crash_kexec_secondary(regs);
-	}
-
 	if (!signr)
 		return;
 
@@ -202,18 +212,25 @@ NOKPROBE_SYMBOL(oops_end);
 static int __die(const char *str, struct pt_regs *regs, long err)
 {
 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-	printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
+
+	if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		printk("LE ");
+	else
+		printk("BE ");
+
+	if (IS_ENABLED(CONFIG_PREEMPT))
+		pr_cont("PREEMPT ");
+
+	if (IS_ENABLED(CONFIG_SMP))
+		pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
+
 	if (debug_pagealloc_enabled())
-		printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_NUMA
-	printk("NUMA ");
-#endif
-	printk("%s\n", ppc_md.name ? ppc_md.name : "");
+		pr_cont("DEBUG_PAGEALLOC ");
+
+	if (IS_ENABLED(CONFIG_NUMA))
+		pr_cont("NUMA ");
+
+	pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
 
 	if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
 		return 1;
@@ -288,23 +305,52 @@ void system_reset_exception(struct pt_regs *regs)
 	if (!nested)
 		nmi_enter();
 
+	__this_cpu_inc(irq_stat.sreset_irqs);
+
 	/* See if any machine dependent calls */
 	if (ppc_md.system_reset_exception) {
 		if (ppc_md.system_reset_exception(regs))
 			goto out;
 	}
 
-	die("System Reset", regs, SIGABRT);
+	if (debugger(regs))
+		goto out;
+
+	/*
+	 * A system reset is a request to dump, so we always send
+	 * it through the crashdump code (if fadump or kdump are
+	 * registered).
+	 */
+	crash_fadump(regs, "System Reset");
+
+	crash_kexec(regs);
+
+	/*
+	 * We aren't the primary crash CPU. We need to send it
+	 * to a holding pattern to avoid it ending up in the panic
+	 * code.
+	 */
+	crash_kexec_secondary(regs);
+
+	/*
+	 * No debugger or crash dump registered, print logs then
+	 * panic.
+	 */
+	__die("System Reset", regs, SIGABRT);
+
+	mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	nmi_panic(regs, "System Reset");
 
 out:
 #ifdef CONFIG_PPC_BOOK3S_64
 	BUG_ON(get_paca()->in_nmi == 0);
 	if (get_paca()->in_nmi > 1)
-		panic("Unrecoverable nested System Reset");
+		nmi_panic(regs, "Unrecoverable nested System Reset");
 #endif
 	/* Must die if the interrupt is not recoverable */
 	if (!(regs->msr & MSR_RI))
-		panic("Unrecoverable System Reset");
+		nmi_panic(regs, "Unrecoverable System Reset");
 
 	if (!nested)
 		nmi_exit();
@@ -312,39 +358,6 @@ out:
 	/* What should we do here? We could issue a shutdown or hard reset. */
 }
 
-#ifdef CONFIG_PPC64
-/*
- * This function is called in real mode. Strictly no printk's please.
- *
- * regs->nip and regs->msr contains srr0 and ssr1.
- */
-long machine_check_early(struct pt_regs *regs)
-{
-	long handled = 0;
-
-	__this_cpu_inc(irq_stat.mce_exceptions);
-
-	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
-		handled = cur_cpu_spec->machine_check_early(regs);
-	return handled;
-}
-
-long hmi_exception_realmode(struct pt_regs *regs)
-{
-	__this_cpu_inc(irq_stat.hmi_exceptions);
-
-	wait_for_subcore_guest_exit();
-
-	if (ppc_md.hmi_exception_early)
-		ppc_md.hmi_exception_early(regs);
-
-	wait_for_tb_resync();
-
-	return 0;
-}
-
-#endif
-
 /*
  * I/O accesses can cause machine checks on powermacs.
  * Check if the NIP corresponds to the address of a sync
@@ -397,11 +410,6 @@ static inline int check_io_access(struct pt_regs *regs)
 /* On 4xx, the reason for the machine check or program exception
    is in the ESR. */
 #define get_reason(regs)	((regs)->dsisr)
-#ifndef CONFIG_FSL_BOOKE
-#define get_mc_reason(regs)	((regs)->dsisr)
-#else
-#define get_mc_reason(regs)	(mfspr(SPRN_MCSR))
-#endif
 #define REASON_FP		ESR_FP
 #define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
 #define REASON_PRIVILEGED	ESR_PPR
@@ -415,108 +423,17 @@ static inline int check_io_access(struct pt_regs *regs)
 /* On non-4xx, the reason for the machine check or program
    exception is in the MSR. */
 #define get_reason(regs)	((regs)->msr)
-#define get_mc_reason(regs)	((regs)->msr)
-#define REASON_TM		0x200000
-#define REASON_FP		0x100000
-#define REASON_ILLEGAL		0x80000
-#define REASON_PRIVILEGED	0x40000
-#define REASON_TRAP		0x20000
+#define REASON_TM		SRR1_PROGTM
+#define REASON_FP		SRR1_PROGFPE
+#define REASON_ILLEGAL		SRR1_PROGILL
+#define REASON_PRIVILEGED	SRR1_PROGPRIV
+#define REASON_TRAP		SRR1_PROGTRAP
 
 #define single_stepping(regs)	((regs)->msr & MSR_SE)
 #define clear_single_step(regs)	((regs)->msr &= ~MSR_SE)
 #endif
 
-#if defined(CONFIG_4xx)
-int machine_check_4xx(struct pt_regs *regs)
-{
-	unsigned long reason = get_mc_reason(regs);
-
-	if (reason & ESR_IMCP) {
-		printk("Instruction");
-		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
-	} else
-		printk("Data");
-	printk(" machine check in kernel mode.\n");
-
-	return 0;
-}
-
-int machine_check_440A(struct pt_regs *regs)
-{
-	unsigned long reason = get_mc_reason(regs);
-
-	printk("Machine check in kernel mode.\n");
-	if (reason & ESR_IMCP){
-		printk("Instruction Synchronous Machine Check exception\n");
-		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
-	}
-	else {
-		u32 mcsr = mfspr(SPRN_MCSR);
-		if (mcsr & MCSR_IB)
-			printk("Instruction Read PLB Error\n");
-		if (mcsr & MCSR_DRB)
-			printk("Data Read PLB Error\n");
-		if (mcsr & MCSR_DWB)
-			printk("Data Write PLB Error\n");
-		if (mcsr & MCSR_TLBP)
-			printk("TLB Parity Error\n");
-		if (mcsr & MCSR_ICP){
-			flush_instruction_cache();
-			printk("I-Cache Parity Error\n");
-		}
-		if (mcsr & MCSR_DCSP)
-			printk("D-Cache Search Parity Error\n");
-		if (mcsr & MCSR_DCFP)
-			printk("D-Cache Flush Parity Error\n");
-		if (mcsr & MCSR_IMPE)
-			printk("Machine Check exception is imprecise\n");
-
-		/* Clear MCSR */
-		mtspr(SPRN_MCSR, mcsr);
-	}
-	return 0;
-}
-
-int machine_check_47x(struct pt_regs *regs)
-{
-	unsigned long reason = get_mc_reason(regs);
-	u32 mcsr;
-
-	printk(KERN_ERR "Machine check in kernel mode.\n");
-	if (reason & ESR_IMCP) {
-		printk(KERN_ERR
-		       "Instruction Synchronous Machine Check exception\n");
-		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
-		return 0;
-	}
-	mcsr = mfspr(SPRN_MCSR);
-	if (mcsr & MCSR_IB)
-		printk(KERN_ERR "Instruction Read PLB Error\n");
-	if (mcsr & MCSR_DRB)
-		printk(KERN_ERR "Data Read PLB Error\n");
-	if (mcsr & MCSR_DWB)
-		printk(KERN_ERR "Data Write PLB Error\n");
-	if (mcsr & MCSR_TLBP)
-		printk(KERN_ERR "TLB Parity Error\n");
-	if (mcsr & MCSR_ICP) {
-		flush_instruction_cache();
-		printk(KERN_ERR "I-Cache Parity Error\n");
-	}
-	if (mcsr & MCSR_DCSP)
-		printk(KERN_ERR "D-Cache Search Parity Error\n");
-	if (mcsr & PPC47x_MCSR_GPR)
-		printk(KERN_ERR "GPR Parity Error\n");
-	if (mcsr & PPC47x_MCSR_FPR)
-		printk(KERN_ERR "FPR Parity Error\n");
-	if (mcsr & PPC47x_MCSR_IPR)
-		printk(KERN_ERR "Machine Check exception is imprecise\n");
-
-	/* Clear MCSR */
-	mtspr(SPRN_MCSR, mcsr);
-
-	return 0;
-}
-#elif defined(CONFIG_E500)
+#if defined(CONFIG_E500)
 int machine_check_e500mc(struct pt_regs *regs)
 {
 	unsigned long mcsr = mfspr(SPRN_MCSR);
@@ -618,7 +535,7 @@ silent_out:
 
 int machine_check_e500(struct pt_regs *regs)
 {
-	unsigned long reason = get_mc_reason(regs);
+	unsigned long reason = mfspr(SPRN_MCSR);
 
 	if (reason & MCSR_BUS_RBERR) {
 		if (fsl_rio_mcheck_exception(regs))
@@ -665,7 +582,7 @@ int machine_check_generic(struct pt_regs *regs)
 #elif defined(CONFIG_E200)
 int machine_check_e200(struct pt_regs *regs)
 {
-	unsigned long reason = get_mc_reason(regs);
+	unsigned long reason = mfspr(SPRN_MCSR);
 
 	printk("Machine check in kernel mode.\n");
 	printk("Caused by (from MCSR=%lx): ", reason);
@@ -687,35 +604,10 @@ int machine_check_e200(struct pt_regs *regs)
 
 	return 0;
 }
-#elif defined(CONFIG_PPC_8xx)
-int machine_check_8xx(struct pt_regs *regs)
-{
-	unsigned long reason = get_mc_reason(regs);
-
-	pr_err("Machine check in kernel mode.\n");
-	pr_err("Caused by (from SRR1=%lx): ", reason);
-	if (reason & 0x40000000)
-		pr_err("Fetch error at address %lx\n", regs->nip);
-	else
-		pr_err("Data access error at address %lx\n", regs->dar);
-
-#ifdef CONFIG_PCI
-	/* the qspan pci read routines can cause machine checks -- Cort
-	 *
-	 * yuck !!! that totally needs to go away ! There are better ways
-	 * to deal with that than having a wart in the mcheck handler.
-	 * -- BenH
-	 */
-	bad_page_fault(regs, regs->dar, SIGBUS);
-	return 1;
-#else
-	return 0;
-#endif
-}
-#else
+#elif defined(CONFIG_PPC32)
 int machine_check_generic(struct pt_regs *regs)
 {
-	unsigned long reason = get_mc_reason(regs);
+	unsigned long reason = regs->msr;
 
 	printk("Machine check in kernel mode.\n");
 	printk("Caused by (from SRR1=%lx): ", reason);
@@ -752,10 +644,14 @@ int machine_check_generic(struct pt_regs *regs)
 
 void machine_check_exception(struct pt_regs *regs)
 {
-	enum ctx_state prev_state = exception_enter();
 	int recover = 0;
+	bool nested = in_nmi();
+	if (!nested)
+		nmi_enter();
 
-	__this_cpu_inc(irq_stat.mce_exceptions);
+	/* 64s accounts the mce in machine_check_early when in HVMODE */
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
+		__this_cpu_inc(irq_stat.mce_exceptions);
 
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
@@ -783,10 +679,11 @@ void machine_check_exception(struct pt_regs *regs)
 
 	/* Must die if the interrupt is not recoverable */
 	if (!(regs->msr & MSR_RI))
-		panic("Unrecoverable Machine check");
+		nmi_panic(regs, "Unrecoverable Machine check");
 
 bail:
-	exception_exit(prev_state);
+	if (!nested)
+		nmi_exit();
 }
 
 void SMIException(struct pt_regs *regs)
@@ -1672,24 +1569,6 @@ void performance_monitor_exception(struct pt_regs *regs)
 	perf_irq(regs);
 }
 
-#ifdef CONFIG_8xx
-void SoftwareEmulation(struct pt_regs *regs)
-{
-	CHECK_FULL_REGS(regs);
-
-	if (!user_mode(regs)) {
-		debugger(regs);
-		die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
-			regs, SIGFPE);
-	}
-
-	if (!emulate_math(regs))
-		return;
-
-	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-}
-#endif /* CONFIG_8xx */
-
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 {
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b20964ea0..5d105b8eeece 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -205,3 +205,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
 
 	return orig_ret_vaddr;
 }
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+				struct pt_regs *regs)
+{
+	if (ctx == RP_CHECK_CHAIN_CALL)
+		return regs->gpr[1] <= ret->stack;
+	else
+		return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 6b2b69616e77..769c2624e0a6 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -232,15 +232,9 @@ __do_get_tspec:
 	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9)
 
 	/* Get a stable TB value */
-#ifdef CONFIG_8xx
-2:	mftbu	r3
-	mftbl	r4
-	mftbu	r0
-#else
-2:	mfspr	r3, SPRN_TBRU
-	mfspr	r4, SPRN_TBRL
-	mfspr	r0, SPRN_TBRU
-#endif
+2:	MFTBU(r3)
+	MFTBL(r4)
+	MFTBU(r0)
 	cmplw	cr0,r3,r0
 	bne-	2b
 
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b1a250560198..882628fa6987 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,7 +8,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
 #define STRICT_ALIGN_SIZE	(1 << 24)
 #else
 #define STRICT_ALIGN_SIZE	PAGE_SIZE
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 34721a257a77..2f6eadd9408d 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -216,6 +216,9 @@ void soft_nmi_interrupt(struct pt_regs *regs)
 		return;
 
 	nmi_enter();
+
+	__this_cpu_inc(irq_stat.soft_nmi_irqs);
+
 	tb = get_tb();
 	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
 		per_cpu(wd_timer_tb, cpu) = tb;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b42812e014c0..67075e065ef2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,7 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
+#include <asm/pte-walk.h>
 
 #include "trace_hv.h"
 
@@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * hugepage split and collapse.
 			 */
 			local_irq_save(flags);
-			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL, NULL);
+			ptep = find_current_mm_pte(current->mm->pgd,
+						   hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (__pte_write(pte))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f6b3e67c5762..c5d7435455f1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -17,6 +17,7 @@
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
 
 /*
  * Supported radix tree geometry.
@@ -322,13 +323,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
 	gpa &= ~0xF000000000000000ul;
 	gfn = gpa >> PAGE_SHIFT;
-	if (!(dsisr & DSISR_PGDIRFAULT))
+	if (!(dsisr & DSISR_PRTABLE_FAULT))
 		gpa |= ea & 0xfff;
 	memslot = gfn_to_memslot(kvm, gfn);
 
 	/* No memslot means it's an emulated MMIO region */
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS |
+		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
 			     DSISR_SET_RC)) {
 			/*
 			 * Bad address in guest page table tree, or other
@@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (writing)
 			pgflags |= _PAGE_DIRTY;
 		local_irq_save(flags);
-		ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
-						   NULL, NULL);
+		ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
 		if (ptep) {
 			pte = READ_ONCE(*ptep);
 			if (pte_present(pte) &&
@@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				spin_unlock(&kvm->mmu_lock);
 				return RESUME_GUEST;
 			}
-			ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
-							gpa, NULL, &shift);
+			/*
+			 * We are walking the secondary page table here. We can do this
+			 * without disabling irq.
+			 */
+			ptep = __find_linux_pte(kvm->arch.pgtable,
+						gpa, NULL, &shift);
 			if (ptep && pte_present(*ptep)) {
 				kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
 							gpa, shift);
@@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			pgflags |= _PAGE_WRITE;
 		} else {
 			local_irq_save(flags);
-			ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
-							hva, NULL, NULL);
+			ptep = find_current_mm_pte(current->mm->pgd,
+						   hva, NULL, NULL);
 			if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
 				pgflags |= _PAGE_WRITE;
 			local_irq_restore(flags);
@@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	unsigned int shift;
 	unsigned long old;
 
-	ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
-					   NULL, &shift);
+	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep)) {
 		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
 					      gpa, shift);
@@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	unsigned int shift;
 	int ref = 0;
 
-	ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
-					   NULL, &shift);
+	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
 		kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
 					gpa, shift);
@@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	unsigned int shift;
 	int ref = 0;
 
-	ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
-					   NULL, &shift);
+	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_young(*ptep))
 		ref = 1;
 	return ref;
@@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
 	unsigned int shift;
 	int ret = 0;
 
-	ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
-					   NULL, &shift);
+	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
 	if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
 		ret = 1;
 		if (shift)
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 3adfd2f5301c..c32e9bfe75b1 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -39,6 +39,7 @@
 #include <asm/udbg.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
+#include <asm/pte-walk.h>
 
 #ifdef CONFIG_BUG
 
@@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
 	pte_t *ptep, pte;
 	unsigned shift = 0;
 
-	ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
+	/*
+	 * Called in real mode with MSR_EE = 0. We are safe here.
+	 * It is ok to do the lookup with arch.pgdir here, because
+	 * we are doing this on secondary cpus and current task there
+	 * is not the hypervisor. Also this is safe against THP in the
+	 * host, because an IPI to primary thread will wait for the secondary
+	 * to exit which will agains result in the below page table walk
+	 * to finish.
+	 */
+	ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
 	if (!ptep || !pte_present(*ptep))
 		return -ENXIO;
 	pte = *ptep;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 359c79cdf0cc..ebcf97cb5c98 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2111,6 +2111,15 @@ static int kvmppc_grab_hwthread(int cpu)
 	struct paca_struct *tpaca;
 	long timeout = 10000;
 
+	/*
+	 * ISA v3.0 idle routines do not set hwthread_state or test
+	 * hwthread_req, so they can not grab idle threads.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		WARN(1, "KVM: can not control sibling threads\n");
+		return -EBUSY;
+	}
+
 	tpaca = &paca[cpu];
 
 	/* Ensure the thread won't go into the kernel if it wakes */
@@ -2145,10 +2154,12 @@ static void kvmppc_release_hwthread(int cpu)
 	struct paca_struct *tpaca;
 
 	tpaca = &paca[cpu];
-	tpaca->kvm_hstate.hwthread_req = 0;
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
 	tpaca->kvm_hstate.kvm_vcore = NULL;
 	tpaca->kvm_hstate.kvm_split_mode = NULL;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		tpaca->kvm_hstate.hwthread_req = 0;
+
 }
 
 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 584c74c8119f..fedb0139524c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -22,6 +22,7 @@
 #include <asm/hvcall.h>
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
 
 /* Translate address of a vmalloc'd thing to a linear map address */
 static void *real_vmalloc_addr(void *x)
@@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x)
 	/*
 	 * assume we don't have huge pages in vmalloc space...
 	 * So don't worry about THP collapse/split. Called
-	 * Only in realmode, hence won't need irq_save/restore.
+	 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
 	 */
-	p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL);
+	p = find_init_mm_pte(addr, NULL);
 	if (!p || !pte_present(*p))
 		return NULL;
 	addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
@@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	 * If we had a page table table change after lookup, we would
 	 * retry via mmu_notifier_retry.
 	 */
-	if (realmode)
-		ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
-						   &hpage_shift);
-	else {
+	if (!realmode)
 		local_irq_save(irq_flags);
-		ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL,
-						 &hpage_shift);
-	}
+	/*
+	 * If called in real mode we have MSR_EE = 0. Otherwise
+	 * we disable irq above.
+	 */
+	ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift);
 	if (ptep) {
 		pte_t pte;
 		unsigned int host_pte_size;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9c9c983b864f..2259b6cde119 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	subf	r4, r4, r3
 	mtspr	SPRN_DEC, r4
 
+BEGIN_FTR_SECTION
 	/* hwthread_req may have got set by cede or no vcpu, so clear it */
 	li	r0, 0
 	stb	r0, HSTATE_HWTHREAD_REQ(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
 	/*
 	 * For external interrupts we need to call the Linux
@@ -314,6 +316,7 @@ kvm_novcpu_exit:
  * Relocation is off and most register values are lost.
  * r13 points to the PACA.
  * r3 contains the SRR1 wakeup value, SRR1 is trashed.
+ * This is not used by ISAv3.0B processors.
  */
 	.globl	kvm_start_guest
 kvm_start_guest:
@@ -432,6 +435,9 @@ kvm_secondary_got_guest:
  * While waiting we also need to check if we get given a vcpu to run.
  */
 kvm_no_guest:
+BEGIN_FTR_SECTION
+	twi	31,0,0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
 	cmpwi	r3, 0
 	bne	53f
@@ -2512,8 +2518,10 @@ kvm_do_nap:
 	clrrdi	r0, r0, 1
 	mtspr	SPRN_CTRLT, r0
 
+BEGIN_FTR_SECTION
 	li	r0,1
 	stb	r0,HSTATE_HWTHREAD_REQ(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mfspr	r5,SPRN_LPCR
 	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
 BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 77fd043b3ecc..c6c734424c70 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -30,6 +30,7 @@
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
 #include <asm/kvm_ppc.h>
+#include <asm/pte-walk.h>
 
 #include "e500.h"
 #include "timing.h"
@@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	 * can't run hence pfn won't change.
 	 */
 	local_irq_save(flags);
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL);
+	ptep = find_linux_pte(pgdir, hva, NULL, NULL);
 	if (ptep) {
 		pte_t pte = READ_ONCE(*ptep);
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3c3146ba62da..50d5bf954cff 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -31,7 +31,8 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
 
-obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
+obj-y			+= sstep.o ldstfp.o quad.o
+obj64-y			+= quad.o
 
 obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 8aedbb5f4b86..da425bb6b369 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -67,6 +67,20 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
+_GLOBAL(memset16)
+	rlwinm.	r0 ,r5, 31, 1, 31
+	addi	r6, r3, -4
+	beq-	2f
+	rlwimi	r4 ,r4 ,16 ,0 ,15
+	mtctr	r0
+1:	stwu	r4, 4(r6)
+	bdnz	1b
+2:	andi.	r0, r5, 1
+	beqlr
+	sth	r4, 4(r6)
+	blr
+EXPORT_SYMBOL(memset16)
+
 /*
  * Use dcbz on the complete cache lines in the destination
  * to set them to zero.  This requires that the destination
@@ -77,22 +91,24 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
  * replaced by a nop once cache is active. This is done in machine_init()
  */
 _GLOBAL(memset)
+	cmplwi	0,r5,4
+	blt	7f
+
 	rlwimi	r4,r4,8,16,23
 	rlwimi	r4,r4,16,0,15
 
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
+	stw	r4,0(r3)
 	beqlr
-	andi.	r0,r6,3
+	andi.	r0,r3,3
 	add	r5,r0,r5
-	subf	r6,r0,r6
+	subf	r6,r0,r3
 	cmplwi	0,r4,0
-	bne	2f	/* Use normal procedure if r4 is not zero */
-EXPORT_SYMBOL(memset)
+	/*
+	 * Skip optimised bloc until cache is enabled. Will be replaced
+	 * by 'bne' during boot to use normal procedure if r4 is not zero
+	 */
 _GLOBAL(memset_nocache_branch)
-	b	2f	/* Skip optimised bloc until cache is enabled */
+	b	2f
 
 	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
 	add	r8,r7,r5
@@ -119,7 +135,6 @@ _GLOBAL(memset_nocache_branch)
 1:	stwu	r4,4(r6)
 	bdnz	1b
 6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
 	beqlr
 	mtctr	r5
 	addi	r6,r6,3
@@ -127,6 +142,15 @@ _GLOBAL(memset_nocache_branch)
 	bdnz	8b
 	blr
 
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+9:	stbu	r4,1(r6)
+	bdnz	9b
+	blr
+EXPORT_SYMBOL(memset)
+
 /*
  * This version uses dcbz on the complete cache lines in the
  * destination area to reduce memory traffic.  This requires that
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index a84d333ecb09..ca5fc8fa7efc 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -45,13 +45,13 @@ _GLOBAL(copypage_power7)
 .machine push
 .machine "power4"
 	/* setup read stream 0  */
-	dcbt	r0,r4,0b01000  	/* addr from */
-	dcbt	r0,r7,0b01010   /* length and depth from */
+	dcbt	0,r4,0b01000  	/* addr from */
+	dcbt	0,r7,0b01010   /* length and depth from */
 	/* setup write stream 1 */
-	dcbtst	r0,r9,0b01000   /* addr to */
-	dcbtst	r0,r10,0b01010  /* length and depth to */
+	dcbtst	0,r9,0b01000   /* addr to */
+	dcbtst	0,r10,0b01010  /* length and depth to */
 	eieio
-	dcbt	r0,r8,0b01010	/* all streams GO */
+	dcbt	0,r8,0b01010	/* all streams GO */
 .machine pop
 
 #ifdef CONFIG_ALTIVEC
@@ -83,7 +83,7 @@ _GLOBAL(copypage_power7)
 	li	r12,112
 
 	.align	5
-1:	lvx	v7,r0,r4
+1:	lvx	v7,0,r4
 	lvx	v6,r4,r6
 	lvx	v5,r4,r7
 	lvx	v4,r4,r8
@@ -92,7 +92,7 @@ _GLOBAL(copypage_power7)
 	lvx	v1,r4,r11
 	lvx	v0,r4,r12
 	addi	r4,r4,128
-	stvx	v7,r0,r3
+	stvx	v7,0,r3
 	stvx	v6,r3,r6
 	stvx	v5,r3,r7
 	stvx	v4,r3,r8
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 706b7cc19846..d416a4a66578 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -315,13 +315,13 @@ err1;	stb	r0,0(r3)
 .machine push
 .machine "power4"
 	/* setup read stream 0 */
-	dcbt	r0,r6,0b01000   /* addr from */
-	dcbt	r0,r7,0b01010   /* length and depth from */
+	dcbt	0,r6,0b01000   /* addr from */
+	dcbt	0,r7,0b01010   /* length and depth from */
 	/* setup write stream 1 */
-	dcbtst	r0,r9,0b01000   /* addr to */
-	dcbtst	r0,r10,0b01010  /* length and depth to */
+	dcbtst	0,r9,0b01000   /* addr to */
+	dcbtst	0,r10,0b01010  /* length and depth to */
 	eieio
-	dcbt	r0,r8,0b01010	/* all streams GO */
+	dcbt	0,r8,0b01010	/* all streams GO */
 .machine pop
 
 	beq	cr1,.Lunwind_stack_nonvmx_copy
@@ -376,26 +376,26 @@ err3;	std	r0,0(r3)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	addi	r4,r4,16
-err3;	stvx	v1,r0,r3
+err3;	stvx	v1,0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	v1,r0,r3
+err3;	stvx	v1,0,r3
 err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	v3,r0,r4
+err3;	lvx	v3,0,r4
 err3;	lvx	v2,r4,r9
 err3;	lvx	v1,r4,r10
 err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	v3,r0,r3
+err3;	stvx	v3,0,r3
 err3;	stvx	v2,r3,r9
 err3;	stvx	v1,r3,r10
 err3;	stvx	v0,r3,r11
@@ -421,7 +421,7 @@ err3;	stvx	v0,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	v7,r0,r4
+err4;	lvx	v7,0,r4
 err4;	lvx	v6,r4,r9
 err4;	lvx	v5,r4,r10
 err4;	lvx	v4,r4,r11
@@ -430,7 +430,7 @@ err4;	lvx	v2,r4,r14
 err4;	lvx	v1,r4,r15
 err4;	lvx	v0,r4,r16
 	addi	r4,r4,128
-err4;	stvx	v7,r0,r3
+err4;	stvx	v7,0,r3
 err4;	stvx	v6,r3,r9
 err4;	stvx	v5,r3,r10
 err4;	stvx	v4,r3,r11
@@ -451,29 +451,29 @@ err4;	stvx	v0,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	v3,r0,r4
+err3;	lvx	v3,0,r4
 err3;	lvx	v2,r4,r9
 err3;	lvx	v1,r4,r10
 err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	v3,r0,r3
+err3;	stvx	v3,0,r3
 err3;	stvx	v2,r3,r9
 err3;	stvx	v1,r3,r10
 err3;	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	v1,r0,r3
+err3;	stvx	v1,0,r3
 err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	addi	r4,r4,16
-err3;	stvx	v1,r0,r3
+err3;	stvx	v1,0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -553,25 +553,25 @@ err3;	lvx	v0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 	addi	r3,r3,16
 	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 err3;	lvx	v0,r4,r9
 	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	v3,r0,r4
+err3;	lvx	v3,0,r4
 	VPERM(v8,v0,v3,v16)
 err3;	lvx	v2,r4,r9
 	VPERM(v9,v3,v2,v16)
@@ -580,7 +580,7 @@ err3;	lvx	v1,r4,r10
 err3;	lvx	v0,r4,r11
 	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 err3;	stvx	v9,r3,r9
 err3;	stvx	v10,r3,r10
 err3;	stvx	v11,r3,r11
@@ -606,7 +606,7 @@ err3;	stvx	v11,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	v7,r0,r4
+err4;	lvx	v7,0,r4
 	VPERM(v8,v0,v7,v16)
 err4;	lvx	v6,r4,r9
 	VPERM(v9,v7,v6,v16)
@@ -623,7 +623,7 @@ err4;	lvx	v1,r4,r15
 err4;	lvx	v0,r4,r16
 	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-err4;	stvx	v8,r0,r3
+err4;	stvx	v8,0,r3
 err4;	stvx	v9,r3,r9
 err4;	stvx	v10,r3,r10
 err4;	stvx	v11,r3,r11
@@ -644,7 +644,7 @@ err4;	stvx	v15,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	v3,r0,r4
+err3;	lvx	v3,0,r4
 	VPERM(v8,v0,v3,v16)
 err3;	lvx	v2,r4,r9
 	VPERM(v9,v3,v2,v16)
@@ -653,27 +653,27 @@ err3;	lvx	v1,r4,r10
 err3;	lvx	v0,r4,r11
 	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 err3;	stvx	v9,r3,r9
 err3;	stvx	v10,r3,r10
 err3;	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 err3;	lvx	v0,r4,r9
 	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	v1,r0,r4
+err3;	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	v8,r0,r3
+err3;	stvx	v8,0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index a58777c1b2cb..ae15eba49c1f 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -21,27 +21,19 @@
 
 #define STKFRM	(PPC_MIN_STKFRM + 16)
 
-	.macro	inst32	op
-reg = 0
-	.rept	32
-20:	\op	reg,0,r4
-	b	3f
-	EX_TABLE(20b,99f)
-reg = reg + 1
-	.endr
-	.endm
-
-/* Get the contents of frN into fr0; N is in r3. */
+/* Get the contents of frN into *p; N is in r3 and p is in r4. */
 _GLOBAL(get_fpr)
 	mflr	r0
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* fr0 is already in fr0 */
-	nop
-reg = 1
-	.rept	31
-	fmr	fr0,reg
-	blr
+reg = 0
+	.rept	32
+	stfd	reg, 0(r4)
+	b	2f
 reg = reg + 1
 	.endr
 1:	mflr	r5
@@ -49,18 +41,23 @@ reg = reg + 1
 	mtctr	r5
 	mtlr	r0
 	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
 
-/* Put the contents of fr0 into frN; N is in r3. */
+/* Put the contents of *p into frN; N is in r3 and p is in r4. */
 _GLOBAL(put_fpr)
 	mflr	r0
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* fr0 is already in fr0 */
-	nop
-reg = 1
-	.rept	31
-	fmr	reg,fr0
-	blr
+reg = 0
+	.rept	32
+	lfd	reg, 0(r4)
+	b	2f
 reg = reg + 1
 	.endr
 1:	mflr	r5
@@ -68,127 +65,24 @@ reg = reg + 1
 	mtctr	r5
 	mtlr	r0
 	bctr
-
-/* Load FP reg N from float at *p.  N is in r3, p in r4. */
-_GLOBAL(do_lfs)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	ori	r7,r6,MSR_FP
-	cmpwi	cr7,r3,0
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	stfd	fr0,STKFRM-16(r1)
-1:	li	r9,-EFAULT
-2:	lfs	fr0,0(r4)
-	li	r9,0
-3:	bl	put_fpr
-	beq	cr7,4f
-	lfd	fr0,STKFRM-16(r1)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
-	blr
-	EX_TABLE(2b,3b)
-
-/* Load FP reg N from double at *p.  N is in r3, p in r4. */
-_GLOBAL(do_lfd)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	ori	r7,r6,MSR_FP
-	cmpwi	cr7,r3,0
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	stfd	fr0,STKFRM-16(r1)
-1:	li	r9,-EFAULT
-2:	lfd	fr0,0(r4)
-	li	r9,0
-3:	beq	cr7,4f
-	bl	put_fpr
-	lfd	fr0,STKFRM-16(r1)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
+2:	MTMSRD(r6)
 	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
 	blr
-	EX_TABLE(2b,3b)
 
-/* Store FP reg N to float at *p.  N is in r3, p in r4. */
-_GLOBAL(do_stfs)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	ori	r7,r6,MSR_FP
-	cmpwi	cr7,r3,0
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	stfd	fr0,STKFRM-16(r1)
-	bl	get_fpr
-1:	li	r9,-EFAULT
-2:	stfs	fr0,0(r4)
-	li	r9,0
-3:	beq	cr7,4f
-	lfd	fr0,STKFRM-16(r1)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
-	blr
-	EX_TABLE(2b,3b)
-
-/* Store FP reg N to double at *p.  N is in r3, p in r4. */
-_GLOBAL(do_stfd)
-	PPC_STLU r1,-STKFRM(r1)
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_vr)
 	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mfmsr	r6
-	ori	r7,r6,MSR_FP
-	cmpwi	cr7,r3,0
+	oris	r7, r6, MSR_VEC@h
 	MTMSRD(r7)
 	isync
-	beq	cr7,1f
-	stfd	fr0,STKFRM-16(r1)
-	bl	get_fpr
-1:	li	r9,-EFAULT
-2:	stfd	fr0,0(r4)
-	li	r9,0
-3:	beq	cr7,4f
-	lfd	fr0,STKFRM-16(r1)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
-	blr
-	EX_TABLE(2b,3b)
-
-#ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into v0; N is in r3. */
-_GLOBAL(get_vr)
-	mflr	r0
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* v0 is already in v0 */
-	nop
-reg = 1
-	.rept	31
-	vor	v0,reg,reg	/* assembler doesn't know vmr? */
-	blr
+reg = 0
+	.rept	32
+	stvx	reg, 0, r4
+	b	2f
 reg = reg + 1
 	.endr
 1:	mflr	r5
@@ -196,18 +90,23 @@ reg = reg + 1
 	mtctr	r5
 	mtlr	r0
 	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
 
-/* Put the contents of v0 into vrN; N is in r3. */
+/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
 _GLOBAL(put_vr)
 	mflr	r0
+	mfmsr	r6
+	oris	r7, r6, MSR_VEC@h
+	MTMSRD(r7)
+	isync
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* v0 is already in v0 */
-	nop
-reg = 1
-	.rept	31
-	vor	reg,v0,v0
-	blr
+reg = 0
+	.rept	32
+	lvx	reg, 0, r4
+	b	2f
 reg = reg + 1
 	.endr
 1:	mflr	r5
@@ -215,62 +114,9 @@ reg = reg + 1
 	mtctr	r5
 	mtlr	r0
 	bctr
-
-/* Load vector reg N from *p.  N is in r3, p in r4. */
-_GLOBAL(do_lvx)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	oris	r7,r6,MSR_VEC@h
-	cmpwi	cr7,r3,0
-	li	r8,STKFRM-16
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	stvx	v0,r1,r8
-1:	li	r9,-EFAULT
-2:	lvx	v0,0,r4
-	li	r9,0
-3:	beq	cr7,4f
-	bl	put_vr
-	lvx	v0,r1,r8
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
+2:	MTMSRD(r6)
 	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
-	blr
-	EX_TABLE(2b,3b)
-
-/* Store vector reg N to *p.  N is in r3, p in r4. */
-_GLOBAL(do_stvx)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	oris	r7,r6,MSR_VEC@h
-	cmpwi	cr7,r3,0
-	li	r8,STKFRM-16
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	stvx	v0,r1,r8
-	bl	get_vr
-1:	li	r9,-EFAULT
-2:	stvx	v0,0,r4
-	li	r9,0
-3:	beq	cr7,4f
-	lvx	v0,r1,r8
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
 	blr
-	EX_TABLE(2b,3b)
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
@@ -313,7 +159,7 @@ reg = reg + 1
 	bctr
 
 /* Load VSX reg N from vector doubleword *p.  N is in r3, p in r4. */
-_GLOBAL(do_lxvd2x)
+_GLOBAL(load_vsrn)
 	PPC_STLU r1,-STKFRM(r1)
 	mflr	r0
 	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
@@ -325,49 +171,74 @@ _GLOBAL(do_lxvd2x)
 	isync
 	beq	cr7,1f
 	STXVD2X(0,R1,R8)
-1:	li	r9,-EFAULT
-2:	LXVD2X(0,R0,R4)
-	li	r9,0
-3:	beq	cr7,4f
+1:	LXVD2X(0,R0,R4)
+#ifdef __LITTLE_ENDIAN__
+	XXSWAPD(0,0)
+#endif
+	beq	cr7,4f
 	bl	put_vsr
 	LXVD2X(0,R1,R8)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
 	isync
-	mr	r3,r9
 	addi	r1,r1,STKFRM
 	blr
-	EX_TABLE(2b,3b)
 
 /* Store VSX reg N to vector doubleword *p.  N is in r3, p in r4. */
-_GLOBAL(do_stxvd2x)
+_GLOBAL(store_vsrn)
 	PPC_STLU r1,-STKFRM(r1)
 	mflr	r0
 	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mfmsr	r6
 	oris	r7,r6,MSR_VSX@h
-	cmpwi	cr7,r3,0
 	li	r8,STKFRM-16
 	MTMSRD(r7)
 	isync
-	beq	cr7,1f
 	STXVD2X(0,R1,R8)
 	bl	get_vsr
-1:	li	r9,-EFAULT
-2:	STXVD2X(0,R0,R4)
-	li	r9,0
-3:	beq	cr7,4f
+#ifdef __LITTLE_ENDIAN__
+	XXSWAPD(0,0)
+#endif
+	STXVD2X(0,R0,R4)
 	LXVD2X(0,R1,R8)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
 	blr
-	EX_TABLE(2b,3b)
-
 #endif /* CONFIG_VSX */
 
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(float *sp, double *dp) */
+_GLOBAL(conv_sp_to_dp)
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	stfd	fr0, -16(r1)
+	lfs	fr0, 0(r3)
+	stfd	fr0, 0(r4)
+	lfd	fr0, -16(r1)
+	MTMSRD(r6)
+	isync
+	blr
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(double *dp, float *sp) */
+_GLOBAL(conv_dp_to_sp)
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	stfd	fr0, -16(r1)
+	lfd	fr0, 0(r3)
+	stfs	fr0, 0(r4)
+	lfd	fr0, -16(r1)
+	MTMSRD(r6)
+	isync
+	blr
+
 #endif	/* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 85fa9869aec5..ec531de99996 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -13,6 +13,23 @@
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
 
+_GLOBAL(__memset16)
+	rlwimi	r4,r4,16,0,15
+	/* fall through */
+
+_GLOBAL(__memset32)
+	rldimi	r4,r4,32,0
+	/* fall through */
+
+_GLOBAL(__memset64)
+	neg	r0,r3
+	andi.	r0,r0,7
+	cmplw	cr1,r5,r0
+	b	.Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+
 _GLOBAL(memset)
 	neg	r0,r3
 	rlwimi	r4,r4,8,16,23
@@ -20,7 +37,7 @@ _GLOBAL(memset)
 	rlwimi	r4,r4,16,0,15
 	cmplw	cr1,r5,r0		/* do we get that far? */
 	rldimi	r4,r4,32,0
-	PPC_MTOCRF(1,r0)
+.Lms:	PPC_MTOCRF(1,r0)
 	mr	r6,r3
 	blt	cr1,8f
 	beq+	3f			/* if already 8-byte aligned */
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 786234fd4e91..193909abd18b 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -261,12 +261,12 @@ _GLOBAL(memcpy_power7)
 
 .machine push
 .machine "power4"
-	dcbt	r0,r6,0b01000
-	dcbt	r0,r7,0b01010
-	dcbtst	r0,r9,0b01000
-	dcbtst	r0,r10,0b01010
+	dcbt	0,r6,0b01000
+	dcbt	0,r7,0b01010
+	dcbtst	0,r9,0b01000
+	dcbtst	0,r10,0b01010
 	eieio
-	dcbt	r0,r8,0b01010	/* GO */
+	dcbt	0,r8,0b01010	/* GO */
 .machine pop
 
 	beq	cr1,.Lunwind_stack_nonvmx_copy
@@ -321,26 +321,26 @@ _GLOBAL(memcpy_power7)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	addi	r4,r4,16
-	stvx	v1,r0,r3
+	stvx	v1,0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	v1,r0,r3
+	stvx	v1,0,r3
 	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	v3,r0,r4
+	lvx	v3,0,r4
 	lvx	v2,r4,r9
 	lvx	v1,r4,r10
 	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	v3,r0,r3
+	stvx	v3,0,r3
 	stvx	v2,r3,r9
 	stvx	v1,r3,r10
 	stvx	v0,r3,r11
@@ -366,7 +366,7 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	v7,r0,r4
+	lvx	v7,0,r4
 	lvx	v6,r4,r9
 	lvx	v5,r4,r10
 	lvx	v4,r4,r11
@@ -375,7 +375,7 @@ _GLOBAL(memcpy_power7)
 	lvx	v1,r4,r15
 	lvx	v0,r4,r16
 	addi	r4,r4,128
-	stvx	v7,r0,r3
+	stvx	v7,0,r3
 	stvx	v6,r3,r9
 	stvx	v5,r3,r10
 	stvx	v4,r3,r11
@@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	v3,r0,r4
+	lvx	v3,0,r4
 	lvx	v2,r4,r9
 	lvx	v1,r4,r10
 	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	v3,r0,r3
+	stvx	v3,0,r3
 	stvx	v2,r3,r9
 	stvx	v1,r3,r10
 	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	v1,r0,r3
+	stvx	v1,0,r3
 	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	addi	r4,r4,16
-	stvx	v1,r0,r3
+	stvx	v1,0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -499,25 +499,25 @@ _GLOBAL(memcpy_power7)
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	addi	r3,r3,16
 	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	lvx	v0,r4,r9
 	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	v3,r0,r4
+	lvx	v3,0,r4
 	VPERM(v8,v0,v3,v16)
 	lvx	v2,r4,r9
 	VPERM(v9,v3,v2,v16)
@@ -526,7 +526,7 @@ _GLOBAL(memcpy_power7)
 	lvx	v0,r4,r11
 	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	stvx	v9,r3,r9
 	stvx	v10,r3,r10
 	stvx	v11,r3,r11
@@ -552,7 +552,7 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	v7,r0,r4
+	lvx	v7,0,r4
 	VPERM(v8,v0,v7,v16)
 	lvx	v6,r4,r9
 	VPERM(v9,v7,v6,v16)
@@ -569,7 +569,7 @@ _GLOBAL(memcpy_power7)
 	lvx	v0,r4,r16
 	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	stvx	v9,r3,r9
 	stvx	v10,r3,r10
 	stvx	v11,r3,r11
@@ -590,7 +590,7 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	v3,r0,r4
+	lvx	v3,0,r4
 	VPERM(v8,v0,v3,v16)
 	lvx	v2,r4,r9
 	VPERM(v9,v3,v2,v16)
@@ -599,27 +599,27 @@ _GLOBAL(memcpy_power7)
 	lvx	v0,r4,r11
 	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	stvx	v9,r3,r9
 	stvx	v10,r3,r10
 	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	lvx	v0,r4,r9
 	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	v1,r0,r4
+	lvx	v1,0,r4
 	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	v8,r0,r3
+	stvx	v8,0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
new file mode 100644
index 000000000000..c4d12fae8724
--- /dev/null
+++ b/arch/powerpc/lib/quad.S
@@ -0,0 +1,62 @@
+/*
+ * Quadword loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+/* do_lq(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lq)
+1:	lq	r6, 0(r3)
+	std	r6, 0(r4)
+	std	r7, 8(r4)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
+_GLOBAL(do_stq)
+1:	stq	r4, 0(r3)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_lqarx(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lqarx)
+1:	PPC_LQARX(6, 0, 3, 0)
+	std	r6, 0(r4)
+	std	r7, 8(r4)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+	    unsigned int *crp) */
+
+_GLOBAL(do_stqcx)
+1:	PPC_STQCX(4, 0, 3)
+	mfcr	r5
+	stw	r5, 0(r6)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ee33327686ae..fb9f58b868e7 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -36,14 +36,33 @@ extern char system_call_common[];
 /*
  * Functions in ldstfp.S
  */
-extern int do_lfs(int rn, unsigned long ea);
-extern int do_lfd(int rn, unsigned long ea);
-extern int do_stfs(int rn, unsigned long ea);
-extern int do_stfd(int rn, unsigned long ea);
-extern int do_lvx(int rn, unsigned long ea);
-extern int do_stvx(int rn, unsigned long ea);
-extern int do_lxvd2x(int rn, unsigned long ea);
-extern int do_stxvd2x(int rn, unsigned long ea);
+extern void get_fpr(int rn, double *p);
+extern void put_fpr(int rn, const double *p);
+extern void get_vr(int rn, __vector128 *p);
+extern void put_vr(int rn, __vector128 *p);
+extern void load_vsrn(int vsr, const void *p);
+extern void store_vsrn(int vsr, void *p);
+extern void conv_sp_to_dp(const float *sp, double *dp);
+extern void conv_dp_to_sp(const double *dp, float *sp);
+#endif
+
+#ifdef __powerpc64__
+/*
+ * Functions in quad.S
+ */
+extern int do_lq(unsigned long ea, unsigned long *regs);
+extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1);
+extern int do_lqarx(unsigned long ea, unsigned long *regs);
+extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+		    unsigned int *crp);
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define IS_LE	1
+#define IS_BE	0
+#else
+#define IS_LE	0
+#define IS_BE	1
 #endif
 
 /*
@@ -62,15 +81,17 @@ static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
 /*
  * Determine whether a conditional branch instruction would branch.
  */
-static nokprobe_inline int branch_taken(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline int branch_taken(unsigned int instr,
+					const struct pt_regs *regs,
+					struct instruction_op *op)
 {
 	unsigned int bo = (instr >> 21) & 0x1f;
 	unsigned int bi;
 
 	if ((bo & 4) == 0) {
 		/* decrement counter */
-		--regs->ctr;
-		if (((bo >> 1) & 1) ^ (regs->ctr == 0))
+		op->type |= DECCTR;
+		if (((bo >> 1) & 1) ^ (regs->ctr == 1))
 			return 0;
 	}
 	if ((bo & 0x10) == 0) {
@@ -82,17 +103,26 @@ static nokprobe_inline int branch_taken(unsigned int instr, struct pt_regs *regs
 	return 1;
 }
 
-static nokprobe_inline long address_ok(struct pt_regs *regs, unsigned long ea, int nb)
+static nokprobe_inline long address_ok(struct pt_regs *regs,
+				       unsigned long ea, int nb)
 {
 	if (!user_mode(regs))
 		return 1;
-	return __access_ok(ea, nb, USER_DS);
+	if (__access_ok(ea, nb, USER_DS))
+		return 1;
+	if (__access_ok(ea, 1, USER_DS))
+		/* Access overlaps the end of the user region */
+		regs->dar = USER_DS.seg;
+	else
+		regs->dar = ea;
+	return 0;
 }
 
 /*
  * Calculate effective address for a D-form instruction
  */
-static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dform_ea(unsigned int instr,
+					      const struct pt_regs *regs)
 {
 	int ra;
 	unsigned long ea;
@@ -102,14 +132,15 @@ static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs
 	if (ra)
 		ea += regs->gpr[ra];
 
-	return truncate_if_32bit(regs->msr, ea);
+	return ea;
 }
 
 #ifdef __powerpc64__
 /*
  * Calculate effective address for a DS-form instruction
  */
-static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dsform_ea(unsigned int instr,
+					       const struct pt_regs *regs)
 {
 	int ra;
 	unsigned long ea;
@@ -119,7 +150,24 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_reg
 	if (ra)
 		ea += regs->gpr[ra];
 
-	return truncate_if_32bit(regs->msr, ea);
+	return ea;
+}
+
+/*
+ * Calculate effective address for a DQ-form instruction
+ */
+static nokprobe_inline unsigned long dqform_ea(unsigned int instr,
+					       const struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) (instr & ~0xf);	/* sign-extend */
+	if (ra)
+		ea += regs->gpr[ra];
+
+	return ea;
 }
 #endif /* __powerpc64 */
 
@@ -127,7 +175,7 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_reg
  * Calculate effective address for an X-form instruction
  */
 static nokprobe_inline unsigned long xform_ea(unsigned int instr,
-						struct pt_regs *regs)
+					      const struct pt_regs *regs)
 {
 	int ra, rb;
 	unsigned long ea;
@@ -138,7 +186,7 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr,
 	if (ra)
 		ea += regs->gpr[ra];
 
-	return truncate_if_32bit(regs->msr, ea);
+	return ea;
 }
 
 /*
@@ -151,7 +199,6 @@ static nokprobe_inline unsigned long max_align(unsigned long x)
 	return x & -x;		/* isolates rightmost bit */
 }
 
-
 static nokprobe_inline unsigned long byterev_2(unsigned long x)
 {
 	return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
@@ -170,8 +217,36 @@ static nokprobe_inline unsigned long byterev_8(unsigned long x)
 }
 #endif
 
+static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
+{
+	switch (nb) {
+	case 2:
+		*(u16 *)ptr = byterev_2(*(u16 *)ptr);
+		break;
+	case 4:
+		*(u32 *)ptr = byterev_4(*(u32 *)ptr);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		*(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr);
+		break;
+	case 16: {
+		unsigned long *up = (unsigned long *)ptr;
+		unsigned long tmp;
+		tmp = byterev_8(up[0]);
+		up[0] = byterev_8(up[1]);
+		up[1] = tmp;
+		break;
+	}
+#endif
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
 static nokprobe_inline int read_mem_aligned(unsigned long *dest,
-					unsigned long ea, int nb)
+					    unsigned long ea, int nb,
+					    struct pt_regs *regs)
 {
 	int err = 0;
 	unsigned long x = 0;
@@ -194,59 +269,77 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
 	}
 	if (!err)
 		*dest = x;
+	else
+		regs->dar = ea;
 	return err;
 }
 
-static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
-				unsigned long ea, int nb, struct pt_regs *regs)
+/*
+ * Copy from userspace to a buffer, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+				       struct pt_regs *regs)
 {
-	int err;
-	unsigned long x, b, c;
-#ifdef __LITTLE_ENDIAN__
-	int len = nb; /* save a copy of the length for byte reversal */
-#endif
+	int err = 0;
+	int c;
 
-	/* unaligned, do this in pieces */
-	x = 0;
 	for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
-		c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
 		c = max_align(ea);
-#endif
 		if (c > nb)
 			c = max_align(nb);
-		err = read_mem_aligned(&b, ea, c);
-		if (err)
-			return err;
-		x = (x << (8 * c)) + b;
-		ea += c;
-	}
-#ifdef __LITTLE_ENDIAN__
-	switch (len) {
-	case 2:
-		*dest = byterev_2(x);
-		break;
-	case 4:
-		*dest = byterev_4(x);
-		break;
+		switch (c) {
+		case 1:
+			err = __get_user(*dest, (unsigned char __user *) ea);
+			break;
+		case 2:
+			err = __get_user(*(u16 *)dest,
+					 (unsigned short __user *) ea);
+			break;
+		case 4:
+			err = __get_user(*(u32 *)dest,
+					 (unsigned int __user *) ea);
+			break;
 #ifdef __powerpc64__
-	case 8:
-		*dest = byterev_8(x);
-		break;
+		case 8:
+			err = __get_user(*(unsigned long *)dest,
+					 (unsigned long __user *) ea);
+			break;
 #endif
+		}
+		if (err) {
+			regs->dar = ea;
+			return err;
+		}
+		dest += c;
+		ea += c;
 	}
-#endif
-#ifdef __BIG_ENDIAN__
-	*dest = x;
-#endif
 	return 0;
 }
 
+static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
+					      unsigned long ea, int nb,
+					      struct pt_regs *regs)
+{
+	union {
+		unsigned long ul;
+		u8 b[sizeof(unsigned long)];
+	} u;
+	int i;
+	int err;
+
+	u.ul = 0;
+	i = IS_BE ? sizeof(unsigned long) - nb : 0;
+	err = copy_mem_in(&u.b[i], ea, nb, regs);
+	if (!err)
+		*dest = u.ul;
+	return err;
+}
+
 /*
  * Read memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred.  N.B. nb must be 1, 2, 4 or 8.
+ * If nb < sizeof(long), the result is right-justified on BE systems.
  */
 static int read_mem(unsigned long *dest, unsigned long ea, int nb,
 			      struct pt_regs *regs)
@@ -254,13 +347,14 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb,
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & (nb - 1)) == 0)
-		return read_mem_aligned(dest, ea, nb);
+		return read_mem_aligned(dest, ea, nb, regs);
 	return read_mem_unaligned(dest, ea, nb, regs);
 }
 NOKPROBE_SYMBOL(read_mem);
 
 static nokprobe_inline int write_mem_aligned(unsigned long val,
-					unsigned long ea, int nb)
+					     unsigned long ea, int nb,
+					     struct pt_regs *regs)
 {
 	int err = 0;
 
@@ -280,51 +374,72 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
 		break;
 #endif
 	}
+	if (err)
+		regs->dar = ea;
 	return err;
 }
 
-static nokprobe_inline int write_mem_unaligned(unsigned long val,
-				unsigned long ea, int nb, struct pt_regs *regs)
+/*
+ * Copy from a buffer to userspace, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+					struct pt_regs *regs)
 {
-	int err;
-	unsigned long c;
+	int err = 0;
+	int c;
 
-#ifdef __LITTLE_ENDIAN__
-	switch (nb) {
-	case 2:
-		val = byterev_2(val);
-		break;
-	case 4:
-		val = byterev_4(val);
-		break;
-#ifdef __powerpc64__
-	case 8:
-		val = byterev_8(val);
-		break;
-#endif
-	}
-#endif
-	/* unaligned or little-endian, do this in pieces */
 	for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
-		c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
 		c = max_align(ea);
-#endif
 		if (c > nb)
 			c = max_align(nb);
-		err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
-		if (err)
+		switch (c) {
+		case 1:
+			err = __put_user(*dest, (unsigned char __user *) ea);
+			break;
+		case 2:
+			err = __put_user(*(u16 *)dest,
+					 (unsigned short __user *) ea);
+			break;
+		case 4:
+			err = __put_user(*(u32 *)dest,
+					 (unsigned int __user *) ea);
+			break;
+#ifdef __powerpc64__
+		case 8:
+			err = __put_user(*(unsigned long *)dest,
+					 (unsigned long __user *) ea);
+			break;
+#endif
+		}
+		if (err) {
+			regs->dar = ea;
 			return err;
+		}
+		dest += c;
 		ea += c;
 	}
 	return 0;
 }
 
+static nokprobe_inline int write_mem_unaligned(unsigned long val,
+					       unsigned long ea, int nb,
+					       struct pt_regs *regs)
+{
+	union {
+		unsigned long ul;
+		u8 b[sizeof(unsigned long)];
+	} u;
+	int i;
+
+	u.ul = val;
+	i = IS_BE ? sizeof(unsigned long) - nb : 0;
+	return copy_mem_out(&u.b[i], ea, nb, regs);
+}
+
 /*
  * Write memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred.  N.B. nb must be 1, 2, 4 or 8.
  */
 static int write_mem(unsigned long val, unsigned long ea, int nb,
 			       struct pt_regs *regs)
@@ -332,163 +447,465 @@ static int write_mem(unsigned long val, unsigned long ea, int nb,
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & (nb - 1)) == 0)
-		return write_mem_aligned(val, ea, nb);
+		return write_mem_aligned(val, ea, nb, regs);
 	return write_mem_unaligned(val, ea, nb, regs);
 }
 NOKPROBE_SYMBOL(write_mem);
 
 #ifdef CONFIG_PPC_FPU
 /*
- * Check the address and alignment, and call func to do the actual
- * load or store.
+ * These access either the real FP register or the image in the
+ * thread_struct, depending on regs->msr & MSR_FP.
  */
-static int do_fp_load(int rn, int (*func)(int, unsigned long),
-				unsigned long ea, int nb,
-				struct pt_regs *regs)
+static int do_fp_load(struct instruction_op *op, unsigned long ea,
+		      struct pt_regs *regs, bool cross_endian)
 {
-	int err;
+	int err, rn, nb;
 	union {
-		double dbl;
-		unsigned long ul[2];
-		struct {
-#ifdef __BIG_ENDIAN__
-			unsigned _pad_;
-			unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
-			unsigned word;
-			unsigned _pad_;
-#endif
-		} single;
-	} data;
-	unsigned long ptr;
-
+		int i;
+		unsigned int u;
+		float f;
+		double d[2];
+		unsigned long l[2];
+		u8 b[2 * sizeof(double)];
+	} u;
+
+	nb = GETSIZE(op->type);
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
-	if ((ea & 3) == 0)
-		return (*func)(rn, ea);
-	ptr = (unsigned long) &data.ul;
-	if (sizeof(unsigned long) == 8 || nb == 4) {
-		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
-		if (nb == 4)
-			ptr = (unsigned long)&(data.single.word);
-	} else {
-		/* reading a double on 32-bit */
-		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
-		if (!err)
-			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
-	}
+	rn = op->reg;
+	err = copy_mem_in(u.b, ea, nb, regs);
 	if (err)
 		return err;
-	return (*func)(rn, ptr);
+	if (unlikely(cross_endian)) {
+		do_byte_reverse(u.b, min(nb, 8));
+		if (nb == 16)
+			do_byte_reverse(&u.b[8], 8);
+	}
+	preempt_disable();
+	if (nb == 4) {
+		if (op->type & FPCONV)
+			conv_sp_to_dp(&u.f, &u.d[0]);
+		else if (op->type & SIGNEXT)
+			u.l[0] = u.i;
+		else
+			u.l[0] = u.u;
+	}
+	if (regs->msr & MSR_FP)
+		put_fpr(rn, &u.d[0]);
+	else
+		current->thread.TS_FPR(rn) = u.l[0];
+	if (nb == 16) {
+		/* lfdp */
+		rn |= 1;
+		if (regs->msr & MSR_FP)
+			put_fpr(rn, &u.d[1]);
+		else
+			current->thread.TS_FPR(rn) = u.l[1];
+	}
+	preempt_enable();
+	return 0;
 }
 NOKPROBE_SYMBOL(do_fp_load);
 
-static int do_fp_store(int rn, int (*func)(int, unsigned long),
-				 unsigned long ea, int nb,
-				 struct pt_regs *regs)
+static int do_fp_store(struct instruction_op *op, unsigned long ea,
+		       struct pt_regs *regs, bool cross_endian)
 {
-	int err;
+	int rn, nb;
 	union {
-		double dbl;
-		unsigned long ul[2];
-		struct {
-#ifdef __BIG_ENDIAN__
-			unsigned _pad_;
-			unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
-			unsigned word;
-			unsigned _pad_;
-#endif
-		} single;
-	} data;
-	unsigned long ptr;
-
+		unsigned int u;
+		float f;
+		double d[2];
+		unsigned long l[2];
+		u8 b[2 * sizeof(double)];
+	} u;
+
+	nb = GETSIZE(op->type);
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
-	if ((ea & 3) == 0)
-		return (*func)(rn, ea);
-	ptr = (unsigned long) &data.ul[0];
-	if (sizeof(unsigned long) == 8 || nb == 4) {
-		if (nb == 4)
-			ptr = (unsigned long)&(data.single.word);
-		err = (*func)(rn, ptr);
-		if (err)
-			return err;
-		err = write_mem_unaligned(data.ul[0], ea, nb, regs);
-	} else {
-		/* writing a double on 32-bit */
-		err = (*func)(rn, ptr);
-		if (err)
-			return err;
-		err = write_mem_unaligned(data.ul[0], ea, 4, regs);
-		if (!err)
-			err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
+	rn = op->reg;
+	preempt_disable();
+	if (regs->msr & MSR_FP)
+		get_fpr(rn, &u.d[0]);
+	else
+		u.l[0] = current->thread.TS_FPR(rn);
+	if (nb == 4) {
+		if (op->type & FPCONV)
+			conv_dp_to_sp(&u.d[0], &u.f);
+		else
+			u.u = u.l[0];
 	}
-	return err;
+	if (nb == 16) {
+		rn |= 1;
+		if (regs->msr & MSR_FP)
+			get_fpr(rn, &u.d[1]);
+		else
+			u.l[1] = current->thread.TS_FPR(rn);
+	}
+	preempt_enable();
+	if (unlikely(cross_endian)) {
+		do_byte_reverse(u.b, min(nb, 8));
+		if (nb == 16)
+			do_byte_reverse(&u.b[8], 8);
+	}
+	return copy_mem_out(u.b, ea, nb, regs);
 }
 NOKPROBE_SYMBOL(do_fp_store);
 #endif
 
 #ifdef CONFIG_ALTIVEC
 /* For Altivec/VMX, no need to worry about alignment */
-static nokprobe_inline int do_vec_load(int rn, int (*func)(int, unsigned long),
-				 unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
+				       int size, struct pt_regs *regs,
+				       bool cross_endian)
 {
+	int err;
+	union {
+		__vector128 v;
+		u8 b[sizeof(__vector128)];
+	} u = {};
+
 	if (!address_ok(regs, ea & ~0xfUL, 16))
 		return -EFAULT;
-	return (*func)(rn, ea);
+	/* align to multiple of size */
+	ea &= ~(size - 1);
+	err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs);
+	if (err)
+		return err;
+	if (unlikely(cross_endian))
+		do_byte_reverse(&u.b[ea & 0xf], size);
+	preempt_disable();
+	if (regs->msr & MSR_VEC)
+		put_vr(rn, &u.v);
+	else
+		current->thread.vr_state.vr[rn] = u.v;
+	preempt_enable();
+	return 0;
 }
 
-static nokprobe_inline int do_vec_store(int rn, int (*func)(int, unsigned long),
-				  unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
+					int size, struct pt_regs *regs,
+					bool cross_endian)
 {
+	union {
+		__vector128 v;
+		u8 b[sizeof(__vector128)];
+	} u;
+
 	if (!address_ok(regs, ea & ~0xfUL, 16))
 		return -EFAULT;
-	return (*func)(rn, ea);
+	/* align to multiple of size */
+	ea &= ~(size - 1);
+
+	preempt_disable();
+	if (regs->msr & MSR_VEC)
+		get_vr(rn, &u.v);
+	else
+		u.v = current->thread.vr_state.vr[rn];
+	preempt_enable();
+	if (unlikely(cross_endian))
+		do_byte_reverse(&u.b[ea & 0xf], size);
+	return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
 }
 #endif /* CONFIG_ALTIVEC */
 
-#ifdef CONFIG_VSX
-static nokprobe_inline int do_vsx_load(int rn, int (*func)(int, unsigned long),
-				 unsigned long ea, struct pt_regs *regs)
+#ifdef __powerpc64__
+static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea,
+				      int reg, bool cross_endian)
 {
 	int err;
-	unsigned long val[2];
 
 	if (!address_ok(regs, ea, 16))
 		return -EFAULT;
-	if ((ea & 3) == 0)
-		return (*func)(rn, ea);
-	err = read_mem_unaligned(&val[0], ea, 8, regs);
-	if (!err)
-		err = read_mem_unaligned(&val[1], ea + 8, 8, regs);
-	if (!err)
-		err = (*func)(rn, (unsigned long) &val[0]);
+	/* if aligned, should be atomic */
+	if ((ea & 0xf) == 0) {
+		err = do_lq(ea, &regs->gpr[reg]);
+	} else {
+		err = read_mem(&regs->gpr[reg + IS_LE], ea, 8, regs);
+		if (!err)
+			err = read_mem(&regs->gpr[reg + IS_BE], ea + 8, 8, regs);
+	}
+	if (!err && unlikely(cross_endian))
+		do_byte_reverse(&regs->gpr[reg], 16);
 	return err;
 }
 
-static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long),
-				 unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea,
+				       int reg, bool cross_endian)
 {
 	int err;
-	unsigned long val[2];
+	unsigned long vals[2];
 
 	if (!address_ok(regs, ea, 16))
 		return -EFAULT;
-	if ((ea & 3) == 0)
-		return (*func)(rn, ea);
-	err = (*func)(rn, (unsigned long) &val[0]);
-	if (err)
-		return err;
-	err = write_mem_unaligned(val[0], ea, 8, regs);
+	vals[0] = regs->gpr[reg];
+	vals[1] = regs->gpr[reg + 1];
+	if (unlikely(cross_endian))
+		do_byte_reverse(vals, 16);
+
+	/* if aligned, should be atomic */
+	if ((ea & 0xf) == 0)
+		return do_stq(ea, vals[0], vals[1]);
+
+	err = write_mem(vals[IS_LE], ea, 8, regs);
 	if (!err)
-		err = write_mem_unaligned(val[1], ea + 8, 8, regs);
+		err = write_mem(vals[IS_BE], ea + 8, 8, regs);
 	return err;
 }
+#endif /* __powerpc64 */
+
+#ifdef CONFIG_VSX
+void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+		      const void *mem, bool rev)
+{
+	int size, read_size;
+	int i, j;
+	const unsigned int *wp;
+	const unsigned short *hp;
+	const unsigned char *bp;
+
+	size = GETSIZE(op->type);
+	reg->d[0] = reg->d[1] = 0;
+
+	switch (op->element_size) {
+	case 16:
+		/* whole vector; lxv[x] or lxvl[l] */
+		if (size == 0)
+			break;
+		memcpy(reg, mem, size);
+		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+			rev = !rev;
+		if (rev)
+			do_byte_reverse(reg, 16);
+		break;
+	case 8:
+		/* scalar loads, lxvd2x, lxvdsx */
+		read_size = (size >= 8) ? 8 : size;
+		i = IS_LE ? 8 : 8 - read_size;
+		memcpy(&reg->b[i], mem, read_size);
+		if (rev)
+			do_byte_reverse(&reg->b[i], 8);
+		if (size < 8) {
+			if (op->type & SIGNEXT) {
+				/* size == 4 is the only case here */
+				reg->d[IS_LE] = (signed int) reg->d[IS_LE];
+			} else if (op->vsx_flags & VSX_FPCONV) {
+				preempt_disable();
+				conv_sp_to_dp(&reg->fp[1 + IS_LE],
+					      &reg->dp[IS_LE]);
+				preempt_enable();
+			}
+		} else {
+			if (size == 16) {
+				unsigned long v = *(unsigned long *)(mem + 8);
+				reg->d[IS_BE] = !rev ? v : byterev_8(v);
+			} else if (op->vsx_flags & VSX_SPLAT)
+				reg->d[IS_BE] = reg->d[IS_LE];
+		}
+		break;
+	case 4:
+		/* lxvw4x, lxvwsx */
+		wp = mem;
+		for (j = 0; j < size / 4; ++j) {
+			i = IS_LE ? 3 - j : j;
+			reg->w[i] = !rev ? *wp++ : byterev_4(*wp++);
+		}
+		if (op->vsx_flags & VSX_SPLAT) {
+			u32 val = reg->w[IS_LE ? 3 : 0];
+			for (; j < 4; ++j) {
+				i = IS_LE ? 3 - j : j;
+				reg->w[i] = val;
+			}
+		}
+		break;
+	case 2:
+		/* lxvh8x */
+		hp = mem;
+		for (j = 0; j < size / 2; ++j) {
+			i = IS_LE ? 7 - j : j;
+			reg->h[i] = !rev ? *hp++ : byterev_2(*hp++);
+		}
+		break;
+	case 1:
+		/* lxvb16x */
+		bp = mem;
+		for (j = 0; j < size; ++j) {
+			i = IS_LE ? 15 - j : j;
+			reg->b[i] = *bp++;
+		}
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_load);
+NOKPROBE_SYMBOL(emulate_vsx_load);
+
+void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
+		       void *mem, bool rev)
+{
+	int size, write_size;
+	int i, j;
+	union vsx_reg buf;
+	unsigned int *wp;
+	unsigned short *hp;
+	unsigned char *bp;
+
+	size = GETSIZE(op->type);
+
+	switch (op->element_size) {
+	case 16:
+		/* stxv, stxvx, stxvl, stxvll */
+		if (size == 0)
+			break;
+		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+			rev = !rev;
+		if (rev) {
+			/* reverse 16 bytes */
+			buf.d[0] = byterev_8(reg->d[1]);
+			buf.d[1] = byterev_8(reg->d[0]);
+			reg = &buf;
+		}
+		memcpy(mem, reg, size);
+		break;
+	case 8:
+		/* scalar stores, stxvd2x */
+		write_size = (size >= 8) ? 8 : size;
+		i = IS_LE ? 8 : 8 - write_size;
+		if (size < 8 && op->vsx_flags & VSX_FPCONV) {
+			buf.d[0] = buf.d[1] = 0;
+			preempt_disable();
+			conv_dp_to_sp(&reg->dp[IS_LE], &buf.fp[1 + IS_LE]);
+			preempt_enable();
+			reg = &buf;
+		}
+		memcpy(mem, &reg->b[i], write_size);
+		if (size == 16)
+			memcpy(mem + 8, &reg->d[IS_BE], 8);
+		if (unlikely(rev)) {
+			do_byte_reverse(mem, write_size);
+			if (size == 16)
+				do_byte_reverse(mem + 8, 8);
+		}
+		break;
+	case 4:
+		/* stxvw4x */
+		wp = mem;
+		for (j = 0; j < size / 4; ++j) {
+			i = IS_LE ? 3 - j : j;
+			*wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]);
+		}
+		break;
+	case 2:
+		/* stxvh8x */
+		hp = mem;
+		for (j = 0; j < size / 2; ++j) {
+			i = IS_LE ? 7 - j : j;
+			*hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]);
+		}
+		break;
+	case 1:
+		/* stvxb16x */
+		bp = mem;
+		for (j = 0; j < size; ++j) {
+			i = IS_LE ? 15 - j : j;
+			*bp++ = reg->b[i];
+		}
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_store);
+NOKPROBE_SYMBOL(emulate_vsx_store);
+
+static nokprobe_inline int do_vsx_load(struct instruction_op *op,
+				       unsigned long ea, struct pt_regs *regs,
+				       bool cross_endian)
+{
+	int reg = op->reg;
+	u8 mem[16];
+	union vsx_reg buf;
+	int size = GETSIZE(op->type);
+
+	if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
+		return -EFAULT;
+
+	emulate_vsx_load(op, &buf, mem, cross_endian);
+	preempt_disable();
+	if (reg < 32) {
+		/* FP regs + extensions */
+		if (regs->msr & MSR_FP) {
+			load_vsrn(reg, &buf);
+		} else {
+			current->thread.fp_state.fpr[reg][0] = buf.d[0];
+			current->thread.fp_state.fpr[reg][1] = buf.d[1];
+		}
+	} else {
+		if (regs->msr & MSR_VEC)
+			load_vsrn(reg, &buf);
+		else
+			current->thread.vr_state.vr[reg - 32] = buf.v;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static nokprobe_inline int do_vsx_store(struct instruction_op *op,
+					unsigned long ea, struct pt_regs *regs,
+					bool cross_endian)
+{
+	int reg = op->reg;
+	u8 mem[16];
+	union vsx_reg buf;
+	int size = GETSIZE(op->type);
+
+	if (!address_ok(regs, ea, size))
+		return -EFAULT;
+
+	preempt_disable();
+	if (reg < 32) {
+		/* FP regs + extensions */
+		if (regs->msr & MSR_FP) {
+			store_vsrn(reg, &buf);
+		} else {
+			buf.d[0] = current->thread.fp_state.fpr[reg][0];
+			buf.d[1] = current->thread.fp_state.fpr[reg][1];
+		}
+	} else {
+		if (regs->msr & MSR_VEC)
+			store_vsrn(reg, &buf);
+		else
+			buf.v = current->thread.vr_state.vr[reg - 32];
+	}
+	preempt_enable();
+	emulate_vsx_store(op, &buf, mem, cross_endian);
+	return  copy_mem_out(mem, ea, size, regs);
+}
 #endif /* CONFIG_VSX */
 
+int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
+{
+	int err;
+	unsigned long i, size;
+
+#ifdef __powerpc64__
+	size = ppc64_caches.l1d.block_size;
+	if (!(regs->msr & MSR_64BIT))
+		ea &= 0xffffffffUL;
+#else
+	size = L1_CACHE_BYTES;
+#endif
+	ea &= ~(size - 1);
+	if (!address_ok(regs, ea, size))
+		return -EFAULT;
+	for (i = 0; i < size; i += sizeof(long)) {
+		err = __put_user(0, (unsigned long __user *) (ea + i));
+		if (err) {
+			regs->dar = ea;
+			return err;
+		}
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(emulate_dcbz);
+
 #define __put_user_asmx(x, addr, err, op, cr)		\
 	__asm__ __volatile__(				\
 		"1:	" op " %2,0,%3\n"		\
@@ -526,24 +943,27 @@ static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long),
 		: "=r" (err)				\
 		: "r" (addr), "i" (-EFAULT), "0" (err))
 
-static nokprobe_inline void set_cr0(struct pt_regs *regs, int rd)
+static nokprobe_inline void set_cr0(const struct pt_regs *regs,
+				    struct instruction_op *op, int rd)
 {
 	long val = regs->gpr[rd];
 
-	regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+	op->type |= SETCC;
+	op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
 #ifdef __powerpc64__
 	if (!(regs->msr & MSR_64BIT))
 		val = (int) val;
 #endif
 	if (val < 0)
-		regs->ccr |= 0x80000000;
+		op->ccval |= 0x80000000;
 	else if (val > 0)
-		regs->ccr |= 0x40000000;
+		op->ccval |= 0x40000000;
 	else
-		regs->ccr |= 0x20000000;
+		op->ccval |= 0x20000000;
 }
 
-static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd,
+static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
+				     struct instruction_op *op, int rd,
 				     unsigned long val1, unsigned long val2,
 				     unsigned long carry_in)
 {
@@ -551,24 +971,29 @@ static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd,
 
 	if (carry_in)
 		++val;
-	regs->gpr[rd] = val;
+	op->type = COMPUTE + SETREG + SETXER;
+	op->reg = rd;
+	op->val = val;
 #ifdef __powerpc64__
 	if (!(regs->msr & MSR_64BIT)) {
 		val = (unsigned int) val;
 		val1 = (unsigned int) val1;
 	}
 #endif
+	op->xerval = regs->xer;
 	if (val < val1 || (carry_in && val == val1))
-		regs->xer |= XER_CA;
+		op->xerval |= XER_CA;
 	else
-		regs->xer &= ~XER_CA;
+		op->xerval &= ~XER_CA;
 }
 
-static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2,
-				    int crfld)
+static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
+					  struct instruction_op *op,
+					  long v1, long v2, int crfld)
 {
 	unsigned int crval, shift;
 
+	op->type = COMPUTE + SETCC;
 	crval = (regs->xer >> 31) & 1;		/* get SO bit */
 	if (v1 < v2)
 		crval |= 8;
@@ -577,14 +1002,17 @@ static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2
 	else
 		crval |= 2;
 	shift = (7 - crfld) * 4;
-	regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+	op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
-static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
-				      unsigned long v2, int crfld)
+static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
+					    struct instruction_op *op,
+					    unsigned long v1,
+					    unsigned long v2, int crfld)
 {
 	unsigned int crval, shift;
 
+	op->type = COMPUTE + SETCC;
 	crval = (regs->xer >> 31) & 1;		/* get SO bit */
 	if (v1 < v2)
 		crval |= 8;
@@ -593,7 +1021,90 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long
 	else
 		crval |= 2;
 	shift = (7 - crfld) * 4;
-	regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+	op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmpb(const struct pt_regs *regs,
+				    struct instruction_op *op,
+				    unsigned long v1, unsigned long v2)
+{
+	unsigned long long out_val, mask;
+	int i;
+
+	out_val = 0;
+	for (i = 0; i < 8; i++) {
+		mask = 0xffUL << (i * 8);
+		if ((v1 & mask) == (v2 & mask))
+			out_val |= mask;
+	}
+	op->val = out_val;
+}
+
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
+				      struct instruction_op *op,
+				      unsigned long v1, int size)
+{
+	unsigned long long out = v1;
+
+	out -= (out >> 1) & 0x5555555555555555;
+	out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2));
+	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+
+	if (size == 8) {	/* popcntb */
+		op->val = out;
+		return;
+	}
+	out += out >> 8;
+	out += out >> 16;
+	if (size == 32) {	/* popcntw */
+		op->val = out & 0x0000003f0000003f;
+		return;
+	}
+
+	out = (out + (out >> 32)) & 0x7f;
+	op->val = out;	/* popcntd */
+}
+
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(const struct pt_regs *regs,
+				      struct instruction_op *op,
+				      unsigned long v1, unsigned long v2)
+{
+	unsigned char perm, idx;
+	unsigned int i;
+
+	perm = 0;
+	for (i = 0; i < 8; i++) {
+		idx = (v1 >> (i * 8)) & 0xff;
+		if (idx < 64)
+			if (v2 & PPC_BIT(idx))
+				perm |= 1 << i;
+	}
+	op->val = perm;
+}
+#endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(const struct pt_regs *regs,
+				    struct instruction_op *op,
+				    unsigned long v, int size)
+{
+	unsigned long long res = v ^ (v >> 8);
+
+	res ^= res >> 16;
+	if (size == 32) {		/* prtyw */
+		op->val = res & 0x0000000100000001;
+		return;
+	}
+
+	res ^= res >> 32;
+	op->val = res & 1;	/*prtyd */
 }
 
 static nokprobe_inline int trap_compare(long v1, long v2)
@@ -629,14 +1140,18 @@ static nokprobe_inline int trap_compare(long v1, long v2)
 #define ROTATE(x, n)	((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
 
 /*
- * Decode an instruction, and execute it if that can be done just by
- * modifying *regs (i.e. integer arithmetic and logical instructions,
- * branches, and barrier instructions).
- * Returns 1 if the instruction has been executed, or 0 if not.
- * Sets *op to indicate what the instruction does.
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ * Integer arithmetic and logical instructions, branches, and barrier
+ * instructions can be emulated just using the information in *op.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
  */
-int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
-			    unsigned int instr)
+int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+		  unsigned int instr)
 {
 	unsigned int opcode, ra, rb, rd, spr, u;
 	unsigned long int imm;
@@ -653,12 +1168,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		imm = (signed short)(instr & 0xfffc);
 		if ((instr & 2) == 0)
 			imm += regs->nip;
-		regs->nip += 4;
-		regs->nip = truncate_if_32bit(regs->msr, regs->nip);
+		op->val = truncate_if_32bit(regs->msr, imm);
 		if (instr & 1)
-			regs->link = regs->nip;
-		if (branch_taken(instr, regs))
-			regs->nip = truncate_if_32bit(regs->msr, imm);
+			op->type |= SETLK;
+		if (branch_taken(instr, regs, op))
+			op->type |= BRTAKEN;
 		return 1;
 #ifdef CONFIG_PPC64
 	case 17:	/* sc */
@@ -669,38 +1183,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		return 0;
 #endif
 	case 18:	/* b */
-		op->type = BRANCH;
+		op->type = BRANCH | BRTAKEN;
 		imm = instr & 0x03fffffc;
 		if (imm & 0x02000000)
 			imm -= 0x04000000;
 		if ((instr & 2) == 0)
 			imm += regs->nip;
+		op->val = truncate_if_32bit(regs->msr, imm);
 		if (instr & 1)
-			regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
-		imm = truncate_if_32bit(regs->msr, imm);
-		regs->nip = imm;
+			op->type |= SETLK;
 		return 1;
 	case 19:
 		switch ((instr >> 1) & 0x3ff) {
 		case 0:		/* mcrf */
+			op->type = COMPUTE + SETCC;
 			rd = 7 - ((instr >> 23) & 0x7);
 			ra = 7 - ((instr >> 18) & 0x7);
 			rd *= 4;
 			ra *= 4;
 			val = (regs->ccr >> ra) & 0xf;
-			regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
-			goto instr_done;
+			op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+			return 1;
 
 		case 16:	/* bclr */
 		case 528:	/* bcctr */
 			op->type = BRANCH;
 			imm = (instr & 0x400)? regs->ctr: regs->link;
-			regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
-			imm = truncate_if_32bit(regs->msr, imm);
+			op->val = truncate_if_32bit(regs->msr, imm);
 			if (instr & 1)
-				regs->link = regs->nip;
-			if (branch_taken(instr, regs))
-				regs->nip = imm;
+				op->type |= SETLK;
+			if (branch_taken(instr, regs, op))
+				op->type |= BRTAKEN;
 			return 1;
 
 		case 18:	/* rfid, scary */
@@ -710,9 +1223,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			return 0;
 
 		case 150:	/* isync */
-			op->type = BARRIER;
-			isync();
-			goto instr_done;
+			op->type = BARRIER | BARRIER_ISYNC;
+			return 1;
 
 		case 33:	/* crnor */
 		case 129:	/* crandc */
@@ -722,45 +1234,44 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		case 289:	/* creqv */
 		case 417:	/* crorc */
 		case 449:	/* cror */
+			op->type = COMPUTE + SETCC;
 			ra = (instr >> 16) & 0x1f;
 			rb = (instr >> 11) & 0x1f;
 			rd = (instr >> 21) & 0x1f;
 			ra = (regs->ccr >> (31 - ra)) & 1;
 			rb = (regs->ccr >> (31 - rb)) & 1;
 			val = (instr >> (6 + ra * 2 + rb)) & 1;
-			regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) |
+			op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
 				(val << (31 - rd));
-			goto instr_done;
+			return 1;
 		}
 		break;
 	case 31:
 		switch ((instr >> 1) & 0x3ff) {
 		case 598:	/* sync */
-			op->type = BARRIER;
+			op->type = BARRIER + BARRIER_SYNC;
 #ifdef __powerpc64__
 			switch ((instr >> 21) & 3) {
 			case 1:		/* lwsync */
-				asm volatile("lwsync" : : : "memory");
-				goto instr_done;
+				op->type = BARRIER + BARRIER_LWSYNC;
+				break;
 			case 2:		/* ptesync */
-				asm volatile("ptesync" : : : "memory");
-				goto instr_done;
+				op->type = BARRIER + BARRIER_PTESYNC;
+				break;
 			}
 #endif
-			mb();
-			goto instr_done;
+			return 1;
 
 		case 854:	/* eieio */
-			op->type = BARRIER;
-			eieio();
-			goto instr_done;
+			op->type = BARRIER + BARRIER_EIEIO;
+			return 1;
 		}
 		break;
 	}
 
 	/* Following cases refer to regs->gpr[], so we need all regs */
 	if (!FULL_REGS(regs))
-		return 0;
+		return -1;
 
 	rd = (instr >> 21) & 0x1f;
 	ra = (instr >> 16) & 0x1f;
@@ -771,21 +1282,21 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 	case 2:		/* tdi */
 		if (rd & trap_compare(regs->gpr[ra], (short) instr))
 			goto trap;
-		goto instr_done;
+		return 1;
 #endif
 	case 3:		/* twi */
 		if (rd & trap_compare((int)regs->gpr[ra], (short) instr))
 			goto trap;
-		goto instr_done;
+		return 1;
 
 	case 7:		/* mulli */
-		regs->gpr[rd] = regs->gpr[ra] * (short) instr;
-		goto instr_done;
+		op->val = regs->gpr[ra] * (short) instr;
+		goto compute_done;
 
 	case 8:		/* subfic */
 		imm = (short) instr;
-		add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
-		goto instr_done;
+		add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
+		return 1;
 
 	case 10:	/* cmpli */
 		imm = (unsigned short) instr;
@@ -794,8 +1305,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		if ((rd & 1) == 0)
 			val = (unsigned int) val;
 #endif
-		do_cmp_unsigned(regs, val, imm, rd >> 2);
-		goto instr_done;
+		do_cmp_unsigned(regs, op, val, imm, rd >> 2);
+		return 1;
 
 	case 11:	/* cmpi */
 		imm = (short) instr;
@@ -804,47 +1315,58 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		if ((rd & 1) == 0)
 			val = (int) val;
 #endif
-		do_cmp_signed(regs, val, imm, rd >> 2);
-		goto instr_done;
+		do_cmp_signed(regs, op, val, imm, rd >> 2);
+		return 1;
 
 	case 12:	/* addic */
 		imm = (short) instr;
-		add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
-		goto instr_done;
+		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+		return 1;
 
 	case 13:	/* addic. */
 		imm = (short) instr;
-		add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
-		set_cr0(regs, rd);
-		goto instr_done;
+		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+		set_cr0(regs, op, rd);
+		return 1;
 
 	case 14:	/* addi */
 		imm = (short) instr;
 		if (ra)
 			imm += regs->gpr[ra];
-		regs->gpr[rd] = imm;
-		goto instr_done;
+		op->val = imm;
+		goto compute_done;
 
 	case 15:	/* addis */
 		imm = ((short) instr) << 16;
 		if (ra)
 			imm += regs->gpr[ra];
-		regs->gpr[rd] = imm;
-		goto instr_done;
+		op->val = imm;
+		goto compute_done;
+
+	case 19:
+		if (((instr >> 1) & 0x1f) == 2) {
+			/* addpcis */
+			imm = (short) (instr & 0xffc1);	/* d0 + d2 fields */
+			imm |= (instr >> 15) & 0x3e;	/* d1 field */
+			op->val = regs->nip + (imm << 16) + 4;
+			goto compute_done;
+		}
+		op->type = UNKNOWN;
+		return 0;
 
 	case 20:	/* rlwimi */
 		mb = (instr >> 6) & 0x1f;
 		me = (instr >> 1) & 0x1f;
 		val = DATA32(regs->gpr[rd]);
 		imm = MASK32(mb, me);
-		regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+		op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
 		goto logical_done;
 
 	case 21:	/* rlwinm */
 		mb = (instr >> 6) & 0x1f;
 		me = (instr >> 1) & 0x1f;
 		val = DATA32(regs->gpr[rd]);
-		regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+		op->val = ROTATE(val, rb) & MASK32(mb, me);
 		goto logical_done;
 
 	case 23:	/* rlwnm */
@@ -852,40 +1374,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		me = (instr >> 1) & 0x1f;
 		rb = regs->gpr[rb] & 0x1f;
 		val = DATA32(regs->gpr[rd]);
-		regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+		op->val = ROTATE(val, rb) & MASK32(mb, me);
 		goto logical_done;
 
 	case 24:	/* ori */
-		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] | imm;
-		goto instr_done;
+		op->val = regs->gpr[rd] | (unsigned short) instr;
+		goto logical_done_nocc;
 
 	case 25:	/* oris */
 		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] | (imm << 16);
-		goto instr_done;
+		op->val = regs->gpr[rd] | (imm << 16);
+		goto logical_done_nocc;
 
 	case 26:	/* xori */
-		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] ^ imm;
-		goto instr_done;
+		op->val = regs->gpr[rd] ^ (unsigned short) instr;
+		goto logical_done_nocc;
 
 	case 27:	/* xoris */
 		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16);
-		goto instr_done;
+		op->val = regs->gpr[rd] ^ (imm << 16);
+		goto logical_done_nocc;
 
 	case 28:	/* andi. */
-		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] & imm;
-		set_cr0(regs, ra);
-		goto instr_done;
+		op->val = regs->gpr[rd] & (unsigned short) instr;
+		set_cr0(regs, op, ra);
+		goto logical_done_nocc;
 
 	case 29:	/* andis. */
 		imm = (unsigned short) instr;
-		regs->gpr[ra] = regs->gpr[rd] & (imm << 16);
-		set_cr0(regs, ra);
-		goto instr_done;
+		op->val = regs->gpr[rd] & (imm << 16);
+		set_cr0(regs, op, ra);
+		goto logical_done_nocc;
 
 #ifdef __powerpc64__
 	case 30:	/* rld* */
@@ -896,48 +1415,60 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			val = ROTATE(val, sh);
 			switch ((instr >> 2) & 3) {
 			case 0:		/* rldicl */
-				regs->gpr[ra] = val & MASK64_L(mb);
-				goto logical_done;
+				val &= MASK64_L(mb);
+				break;
 			case 1:		/* rldicr */
-				regs->gpr[ra] = val & MASK64_R(mb);
-				goto logical_done;
+				val &= MASK64_R(mb);
+				break;
 			case 2:		/* rldic */
-				regs->gpr[ra] = val & MASK64(mb, 63 - sh);
-				goto logical_done;
+				val &= MASK64(mb, 63 - sh);
+				break;
 			case 3:		/* rldimi */
 				imm = MASK64(mb, 63 - sh);
-				regs->gpr[ra] = (regs->gpr[ra] & ~imm) |
+				val = (regs->gpr[ra] & ~imm) |
 					(val & imm);
-				goto logical_done;
 			}
+			op->val = val;
+			goto logical_done;
 		} else {
 			sh = regs->gpr[rb] & 0x3f;
 			val = ROTATE(val, sh);
 			switch ((instr >> 1) & 7) {
 			case 0:		/* rldcl */
-				regs->gpr[ra] = val & MASK64_L(mb);
+				op->val = val & MASK64_L(mb);
 				goto logical_done;
 			case 1:		/* rldcr */
-				regs->gpr[ra] = val & MASK64_R(mb);
+				op->val = val & MASK64_R(mb);
 				goto logical_done;
 			}
 		}
 #endif
-	break; /* illegal instruction */
+		op->type = UNKNOWN;	/* illegal instruction */
+		return 0;
 
 	case 31:
+		/* isel occupies 32 minor opcodes */
+		if (((instr >> 1) & 0x1f) == 15) {
+			mb = (instr >> 6) & 0x1f; /* bc field */
+			val = (regs->ccr >> (31 - mb)) & 1;
+			val2 = (ra) ? regs->gpr[ra] : 0;
+
+			op->val = (val) ? val2 : regs->gpr[rb];
+			goto compute_done;
+		}
+
 		switch ((instr >> 1) & 0x3ff) {
 		case 4:		/* tw */
 			if (rd == 0x1f ||
 			    (rd & trap_compare((int)regs->gpr[ra],
 					       (int)regs->gpr[rb])))
 				goto trap;
-			goto instr_done;
+			return 1;
 #ifdef __powerpc64__
 		case 68:	/* td */
 			if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
 				goto trap;
-			goto instr_done;
+			return 1;
 #endif
 		case 83:	/* mfmsr */
 			if (regs->msr & MSR_PR)
@@ -966,74 +1497,50 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 #endif
 
 		case 19:	/* mfcr */
+			imm = 0xffffffffUL;
 			if ((instr >> 20) & 1) {
 				imm = 0xf0000000UL;
 				for (sh = 0; sh < 8; ++sh) {
-					if (instr & (0x80000 >> sh)) {
-						regs->gpr[rd] = regs->ccr & imm;
+					if (instr & (0x80000 >> sh))
 						break;
-					}
 					imm >>= 4;
 				}
-
-				goto instr_done;
 			}
-
-			regs->gpr[rd] = regs->ccr;
-			regs->gpr[rd] &= 0xffffffffUL;
-			goto instr_done;
+			op->val = regs->ccr & imm;
+			goto compute_done;
 
 		case 144:	/* mtcrf */
+			op->type = COMPUTE + SETCC;
 			imm = 0xf0000000UL;
 			val = regs->gpr[rd];
+			op->val = regs->ccr;
 			for (sh = 0; sh < 8; ++sh) {
 				if (instr & (0x80000 >> sh))
-					regs->ccr = (regs->ccr & ~imm) |
+					op->val = (op->val & ~imm) |
 						(val & imm);
 				imm >>= 4;
 			}
-			goto instr_done;
+			return 1;
 
 		case 339:	/* mfspr */
 			spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
-			switch (spr) {
-			case SPRN_XER:	/* mfxer */
-				regs->gpr[rd] = regs->xer;
-				regs->gpr[rd] &= 0xffffffffUL;
-				goto instr_done;
-			case SPRN_LR:	/* mflr */
-				regs->gpr[rd] = regs->link;
-				goto instr_done;
-			case SPRN_CTR:	/* mfctr */
-				regs->gpr[rd] = regs->ctr;
-				goto instr_done;
-			default:
-				op->type = MFSPR;
-				op->reg = rd;
-				op->spr = spr;
-				return 0;
-			}
-			break;
+			op->type = MFSPR;
+			op->reg = rd;
+			op->spr = spr;
+			if (spr == SPRN_XER || spr == SPRN_LR ||
+			    spr == SPRN_CTR)
+				return 1;
+			return 0;
 
 		case 467:	/* mtspr */
 			spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
-			switch (spr) {
-			case SPRN_XER:	/* mtxer */
-				regs->xer = (regs->gpr[rd] & 0xffffffffUL);
-				goto instr_done;
-			case SPRN_LR:	/* mtlr */
-				regs->link = regs->gpr[rd];
-				goto instr_done;
-			case SPRN_CTR:	/* mtctr */
-				regs->ctr = regs->gpr[rd];
-				goto instr_done;
-			default:
-				op->type = MTSPR;
-				op->val = regs->gpr[rd];
-				op->spr = spr;
-				return 0;
-			}
-			break;
+			op->type = MTSPR;
+			op->val = regs->gpr[rd];
+			op->spr = spr;
+			if (spr == SPRN_XER || spr == SPRN_LR ||
+			    spr == SPRN_CTR)
+				return 1;
+			return 0;
 
 /*
  * Compare instructions
@@ -1048,8 +1555,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 				val2 = (int) val2;
 			}
 #endif
-			do_cmp_signed(regs, val, val2, rd >> 2);
-			goto instr_done;
+			do_cmp_signed(regs, op, val, val2, rd >> 2);
+			return 1;
 
 		case 32:	/* cmpl */
 			val = regs->gpr[ra];
@@ -1061,109 +1568,113 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 				val2 = (unsigned int) val2;
 			}
 #endif
-			do_cmp_unsigned(regs, val, val2, rd >> 2);
-			goto instr_done;
+			do_cmp_unsigned(regs, op, val, val2, rd >> 2);
+			return 1;
+
+		case 508: /* cmpb */
+			do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]);
+			goto logical_done_nocc;
 
 /*
  * Arithmetic instructions
  */
 		case 8:	/* subfc */
-			add_with_carry(regs, rd, ~regs->gpr[ra],
+			add_with_carry(regs, op, rd, ~regs->gpr[ra],
 				       regs->gpr[rb], 1);
 			goto arith_done;
 #ifdef __powerpc64__
 		case 9:	/* mulhdu */
-			asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			asm("mulhdu %0,%1,%2" : "=r" (op->val) :
 			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
 			goto arith_done;
 #endif
 		case 10:	/* addc */
-			add_with_carry(regs, rd, regs->gpr[ra],
+			add_with_carry(regs, op, rd, regs->gpr[ra],
 				       regs->gpr[rb], 0);
 			goto arith_done;
 
 		case 11:	/* mulhwu */
-			asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			asm("mulhwu %0,%1,%2" : "=r" (op->val) :
 			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
 			goto arith_done;
 
 		case 40:	/* subf */
-			regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+			op->val = regs->gpr[rb] - regs->gpr[ra];
 			goto arith_done;
 #ifdef __powerpc64__
 		case 73:	/* mulhd */
-			asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			asm("mulhd %0,%1,%2" : "=r" (op->val) :
 			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
 			goto arith_done;
 #endif
 		case 75:	/* mulhw */
-			asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			asm("mulhw %0,%1,%2" : "=r" (op->val) :
 			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
 			goto arith_done;
 
 		case 104:	/* neg */
-			regs->gpr[rd] = -regs->gpr[ra];
+			op->val = -regs->gpr[ra];
 			goto arith_done;
 
 		case 136:	/* subfe */
-			add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
-				       regs->xer & XER_CA);
+			add_with_carry(regs, op, rd, ~regs->gpr[ra],
+				       regs->gpr[rb], regs->xer & XER_CA);
 			goto arith_done;
 
 		case 138:	/* adde */
-			add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
-				       regs->xer & XER_CA);
+			add_with_carry(regs, op, rd, regs->gpr[ra],
+				       regs->gpr[rb], regs->xer & XER_CA);
 			goto arith_done;
 
 		case 200:	/* subfze */
-			add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
+			add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L,
 				       regs->xer & XER_CA);
 			goto arith_done;
 
 		case 202:	/* addze */
-			add_with_carry(regs, rd, regs->gpr[ra], 0L,
+			add_with_carry(regs, op, rd, regs->gpr[ra], 0L,
 				       regs->xer & XER_CA);
 			goto arith_done;
 
 		case 232:	/* subfme */
-			add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
+			add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L,
 				       regs->xer & XER_CA);
 			goto arith_done;
 #ifdef __powerpc64__
 		case 233:	/* mulld */
-			regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+			op->val = regs->gpr[ra] * regs->gpr[rb];
 			goto arith_done;
 #endif
 		case 234:	/* addme */
-			add_with_carry(regs, rd, regs->gpr[ra], -1L,
+			add_with_carry(regs, op, rd, regs->gpr[ra], -1L,
 				       regs->xer & XER_CA);
 			goto arith_done;
 
 		case 235:	/* mullw */
-			regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
+			op->val = (unsigned int) regs->gpr[ra] *
 				(unsigned int) regs->gpr[rb];
 			goto arith_done;
 
 		case 266:	/* add */
-			regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+			op->val = regs->gpr[ra] + regs->gpr[rb];
 			goto arith_done;
 #ifdef __powerpc64__
 		case 457:	/* divdu */
-			regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+			op->val = regs->gpr[ra] / regs->gpr[rb];
 			goto arith_done;
 #endif
 		case 459:	/* divwu */
-			regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+			op->val = (unsigned int) regs->gpr[ra] /
 				(unsigned int) regs->gpr[rb];
 			goto arith_done;
 #ifdef __powerpc64__
 		case 489:	/* divd */
-			regs->gpr[rd] = (long int) regs->gpr[ra] /
+			op->val = (long int) regs->gpr[ra] /
 				(long int) regs->gpr[rb];
 			goto arith_done;
 #endif
 		case 491:	/* divw */
-			regs->gpr[rd] = (int) regs->gpr[ra] /
+			op->val = (int) regs->gpr[ra] /
 				(int) regs->gpr[rb];
 			goto arith_done;
 
@@ -1172,57 +1683,79 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
  * Logical instructions
  */
 		case 26:	/* cntlzw */
-			asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
-			    "r" (regs->gpr[rd]));
+			op->val = __builtin_clz((unsigned int) regs->gpr[rd]);
 			goto logical_done;
 #ifdef __powerpc64__
 		case 58:	/* cntlzd */
-			asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) :
-			    "r" (regs->gpr[rd]));
+			op->val = __builtin_clzl(regs->gpr[rd]);
 			goto logical_done;
 #endif
 		case 28:	/* and */
-			regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb];
+			op->val = regs->gpr[rd] & regs->gpr[rb];
 			goto logical_done;
 
 		case 60:	/* andc */
-			regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
+			op->val = regs->gpr[rd] & ~regs->gpr[rb];
 			goto logical_done;
 
+		case 122:	/* popcntb */
+			do_popcnt(regs, op, regs->gpr[rd], 8);
+			goto logical_done_nocc;
+
 		case 124:	/* nor */
-			regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
+			op->val = ~(regs->gpr[rd] | regs->gpr[rb]);
 			goto logical_done;
 
+		case 154:	/* prtyw */
+			do_prty(regs, op, regs->gpr[rd], 32);
+			goto logical_done_nocc;
+
+		case 186:	/* prtyd */
+			do_prty(regs, op, regs->gpr[rd], 64);
+			goto logical_done_nocc;
+#ifdef CONFIG_PPC64
+		case 252:	/* bpermd */
+			do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]);
+			goto logical_done_nocc;
+#endif
 		case 284:	/* xor */
-			regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+			op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]);
 			goto logical_done;
 
 		case 316:	/* xor */
-			regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
+			op->val = regs->gpr[rd] ^ regs->gpr[rb];
 			goto logical_done;
 
+		case 378:	/* popcntw */
+			do_popcnt(regs, op, regs->gpr[rd], 32);
+			goto logical_done_nocc;
+
 		case 412:	/* orc */
-			regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
+			op->val = regs->gpr[rd] | ~regs->gpr[rb];
 			goto logical_done;
 
 		case 444:	/* or */
-			regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb];
+			op->val = regs->gpr[rd] | regs->gpr[rb];
 			goto logical_done;
 
 		case 476:	/* nand */
-			regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
+			op->val = ~(regs->gpr[rd] & regs->gpr[rb]);
 			goto logical_done;
-
+#ifdef CONFIG_PPC64
+		case 506:	/* popcntd */
+			do_popcnt(regs, op, regs->gpr[rd], 64);
+			goto logical_done_nocc;
+#endif
 		case 922:	/* extsh */
-			regs->gpr[ra] = (signed short) regs->gpr[rd];
+			op->val = (signed short) regs->gpr[rd];
 			goto logical_done;
 
 		case 954:	/* extsb */
-			regs->gpr[ra] = (signed char) regs->gpr[rd];
+			op->val = (signed char) regs->gpr[rd];
 			goto logical_done;
 #ifdef __powerpc64__
 		case 986:	/* extsw */
-			regs->gpr[ra] = (signed int) regs->gpr[rd];
+			op->val = (signed int) regs->gpr[rd];
 			goto logical_done;
 #endif
 
@@ -1232,75 +1765,83 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		case 24:	/* slw */
 			sh = regs->gpr[rb] & 0x3f;
 			if (sh < 32)
-				regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL;
+				op->val = (regs->gpr[rd] << sh) & 0xffffffffUL;
 			else
-				regs->gpr[ra] = 0;
+				op->val = 0;
 			goto logical_done;
 
 		case 536:	/* srw */
 			sh = regs->gpr[rb] & 0x3f;
 			if (sh < 32)
-				regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+				op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh;
 			else
-				regs->gpr[ra] = 0;
+				op->val = 0;
 			goto logical_done;
 
 		case 792:	/* sraw */
+			op->type = COMPUTE + SETREG + SETXER;
 			sh = regs->gpr[rb] & 0x3f;
 			ival = (signed int) regs->gpr[rd];
-			regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
+			op->val = ival >> (sh < 32 ? sh : 31);
+			op->xerval = regs->xer;
 			if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
-				regs->xer |= XER_CA;
+				op->xerval |= XER_CA;
 			else
-				regs->xer &= ~XER_CA;
+				op->xerval &= ~XER_CA;
 			goto logical_done;
 
 		case 824:	/* srawi */
+			op->type = COMPUTE + SETREG + SETXER;
 			sh = rb;
 			ival = (signed int) regs->gpr[rd];
-			regs->gpr[ra] = ival >> sh;
+			op->val = ival >> sh;
+			op->xerval = regs->xer;
 			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
-				regs->xer |= XER_CA;
+				op->xerval |= XER_CA;
 			else
-				regs->xer &= ~XER_CA;
+				op->xerval &= ~XER_CA;
 			goto logical_done;
 
 #ifdef __powerpc64__
 		case 27:	/* sld */
 			sh = regs->gpr[rb] & 0x7f;
 			if (sh < 64)
-				regs->gpr[ra] = regs->gpr[rd] << sh;
+				op->val = regs->gpr[rd] << sh;
 			else
-				regs->gpr[ra] = 0;
+				op->val = 0;
 			goto logical_done;
 
 		case 539:	/* srd */
 			sh = regs->gpr[rb] & 0x7f;
 			if (sh < 64)
-				regs->gpr[ra] = regs->gpr[rd] >> sh;
+				op->val = regs->gpr[rd] >> sh;
 			else
-				regs->gpr[ra] = 0;
+				op->val = 0;
 			goto logical_done;
 
 		case 794:	/* srad */
+			op->type = COMPUTE + SETREG + SETXER;
 			sh = regs->gpr[rb] & 0x7f;
 			ival = (signed long int) regs->gpr[rd];
-			regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
+			op->val = ival >> (sh < 64 ? sh : 63);
+			op->xerval = regs->xer;
 			if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
-				regs->xer |= XER_CA;
+				op->xerval |= XER_CA;
 			else
-				regs->xer &= ~XER_CA;
+				op->xerval &= ~XER_CA;
 			goto logical_done;
 
 		case 826:	/* sradi with sh_5 = 0 */
 		case 827:	/* sradi with sh_5 = 1 */
+			op->type = COMPUTE + SETREG + SETXER;
 			sh = rb | ((instr & 2) << 4);
 			ival = (signed long int) regs->gpr[rd];
-			regs->gpr[ra] = ival >> sh;
+			op->val = ival >> sh;
+			op->xerval = regs->xer;
 			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
-				regs->xer |= XER_CA;
+				op->xerval |= XER_CA;
 			else
-				regs->xer &= ~XER_CA;
+				op->xerval &= ~XER_CA;
 			goto logical_done;
 #endif /* __powerpc64__ */
 
@@ -1333,18 +1874,24 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			op->type = MKOP(CACHEOP, ICBI, 0);
 			op->ea = xform_ea(instr, regs);
 			return 0;
+
+		case 1014:	/* dcbz */
+			op->type = MKOP(CACHEOP, DCBZ, 0);
+			op->ea = xform_ea(instr, regs);
+			return 0;
 		}
 		break;
 	}
 
-	/*
-	 * Loads and stores.
-	 */
+/*
+ * Loads and stores.
+ */
 	op->type = UNKNOWN;
 	op->update_reg = ra;
 	op->reg = rd;
 	op->val = regs->gpr[rd];
 	u = (instr >> 20) & UPDATE;
+	op->vsx_flags = 0;
 
 	switch (opcode) {
 	case 31:
@@ -1368,9 +1915,30 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			op->type = MKOP(STCX, 0, 8);
 			break;
 
-		case 21:	/* ldx */
-		case 53:	/* ldux */
-			op->type = MKOP(LOAD, u, 8);
+		case 52:	/* lbarx */
+			op->type = MKOP(LARX, 0, 1);
+			break;
+
+		case 694:	/* stbcx. */
+			op->type = MKOP(STCX, 0, 1);
+			break;
+
+		case 116:	/* lharx */
+			op->type = MKOP(LARX, 0, 2);
+			break;
+
+		case 726:	/* sthcx. */
+			op->type = MKOP(STCX, 0, 2);
+			break;
+
+		case 276:	/* lqarx */
+			if (!((rd & 1) || rd == ra || rd == rb))
+				op->type = MKOP(LARX, 0, 16);
+			break;
+
+		case 182:	/* stqcx. */
+			if (!(rd & 1))
+				op->type = MKOP(STCX, 0, 16);
 			break;
 #endif
 
@@ -1385,22 +1953,58 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			break;
 
 #ifdef CONFIG_ALTIVEC
+		/*
+		 * Note: for the load/store vector element instructions,
+		 * bits of the EA say which field of the VMX register to use.
+		 */
+		case 7:		/* lvebx */
+			op->type = MKOP(LOAD_VMX, 0, 1);
+			op->element_size = 1;
+			break;
+
+		case 39:	/* lvehx */
+			op->type = MKOP(LOAD_VMX, 0, 2);
+			op->element_size = 2;
+			break;
+
+		case 71:	/* lvewx */
+			op->type = MKOP(LOAD_VMX, 0, 4);
+			op->element_size = 4;
+			break;
+
 		case 103:	/* lvx */
 		case 359:	/* lvxl */
-			if (!(regs->msr & MSR_VEC))
-				goto vecunavail;
 			op->type = MKOP(LOAD_VMX, 0, 16);
+			op->element_size = 16;
+			break;
+
+		case 135:	/* stvebx */
+			op->type = MKOP(STORE_VMX, 0, 1);
+			op->element_size = 1;
+			break;
+
+		case 167:	/* stvehx */
+			op->type = MKOP(STORE_VMX, 0, 2);
+			op->element_size = 2;
+			break;
+
+		case 199:	/* stvewx */
+			op->type = MKOP(STORE_VMX, 0, 4);
+			op->element_size = 4;
 			break;
 
 		case 231:	/* stvx */
 		case 487:	/* stvxl */
-			if (!(regs->msr & MSR_VEC))
-				goto vecunavail;
 			op->type = MKOP(STORE_VMX, 0, 16);
 			break;
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef __powerpc64__
+		case 21:	/* ldx */
+		case 53:	/* ldux */
+			op->type = MKOP(LOAD, u, 8);
+			break;
+
 		case 149:	/* stdx */
 		case 181:	/* stdux */
 			op->type = MKOP(STORE, u, 8);
@@ -1457,41 +2061,52 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			if (rb == 0)
 				rb = 32;	/* # bytes to load */
 			op->type = MKOP(LOAD_MULTI, 0, rb);
-			op->ea = 0;
-			if (ra)
-				op->ea = truncate_if_32bit(regs->msr,
-							   regs->gpr[ra]);
+			op->ea = ra ? regs->gpr[ra] : 0;
 			break;
 
 #ifdef CONFIG_PPC_FPU
 		case 535:	/* lfsx */
 		case 567:	/* lfsux */
-			if (!(regs->msr & MSR_FP))
-				goto fpunavail;
-			op->type = MKOP(LOAD_FP, u, 4);
+			op->type = MKOP(LOAD_FP, u | FPCONV, 4);
 			break;
 
 		case 599:	/* lfdx */
 		case 631:	/* lfdux */
-			if (!(regs->msr & MSR_FP))
-				goto fpunavail;
 			op->type = MKOP(LOAD_FP, u, 8);
 			break;
 
 		case 663:	/* stfsx */
 		case 695:	/* stfsux */
-			if (!(regs->msr & MSR_FP))
-				goto fpunavail;
-			op->type = MKOP(STORE_FP, u, 4);
+			op->type = MKOP(STORE_FP, u | FPCONV, 4);
 			break;
 
 		case 727:	/* stfdx */
 		case 759:	/* stfdux */
-			if (!(regs->msr & MSR_FP))
-				goto fpunavail;
 			op->type = MKOP(STORE_FP, u, 8);
 			break;
-#endif
+
+#ifdef __powerpc64__
+		case 791:	/* lfdpx */
+			op->type = MKOP(LOAD_FP, 0, 16);
+			break;
+
+		case 855:	/* lfiwax */
+			op->type = MKOP(LOAD_FP, SIGNEXT, 4);
+			break;
+
+		case 887:	/* lfiwzx */
+			op->type = MKOP(LOAD_FP, 0, 4);
+			break;
+
+		case 919:	/* stfdpx */
+			op->type = MKOP(STORE_FP, 0, 16);
+			break;
+
+		case 983:	/* stfiwx */
+			op->type = MKOP(STORE_FP, 0, 4);
+			break;
+#endif /* __powerpc64 */
+#endif /* CONFIG_PPC_FPU */
 
 #ifdef __powerpc64__
 		case 660:	/* stdbrx */
@@ -1509,14 +2124,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			op->val = byterev_4(regs->gpr[rd]);
 			break;
 
-		case 725:
+		case 725:	/* stswi */
 			if (rb == 0)
 				rb = 32;	/* # bytes to store */
 			op->type = MKOP(STORE_MULTI, 0, rb);
-			op->ea = 0;
-			if (ra)
-				op->ea = truncate_if_32bit(regs->msr,
-							   regs->gpr[ra]);
+			op->ea = ra ? regs->gpr[ra] : 0;
 			break;
 
 		case 790:	/* lhbrx */
@@ -1529,20 +2141,184 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			break;
 
 #ifdef CONFIG_VSX
+		case 12:	/* lxsiwzx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			break;
+
+		case 76:	/* lxsiwax */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
+			op->element_size = 8;
+			break;
+
+		case 140:	/* stxsiwx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			break;
+
+		case 268:	/* lxvx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 269:	/* lxvl */
+		case 301: {	/* lxvll */
+			int nb;
+			op->reg = rd | ((instr & 1) << 5);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			nb = regs->gpr[rb] & 0xff;
+			if (nb > 16)
+				nb = 16;
+			op->type = MKOP(LOAD_VSX, 0, nb);
+			op->element_size = 16;
+			op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+				VSX_CHECK_VEC;
+			break;
+		}
+		case 332:	/* lxvdsx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_SPLAT;
+			break;
+
+		case 364:	/* lxvwsx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 4;
+			op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
+			break;
+
+		case 396:	/* stxvx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 397:	/* stxvl */
+		case 429: {	/* stxvll */
+			int nb;
+			op->reg = rd | ((instr & 1) << 5);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			nb = regs->gpr[rb] & 0xff;
+			if (nb > 16)
+				nb = 16;
+			op->type = MKOP(STORE_VSX, 0, nb);
+			op->element_size = 16;
+			op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+				VSX_CHECK_VEC;
+			break;
+		}
+		case 524:	/* lxsspx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV;
+			break;
+
+		case 588:	/* lxsdx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			break;
+
+		case 652:	/* stxsspx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV;
+			break;
+
+		case 716:	/* stxsdx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 8);
+			op->element_size = 8;
+			break;
+
+		case 780:	/* lxvw4x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 4;
+			break;
+
+		case 781:	/* lxsibzx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 1);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 812:	/* lxvh8x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 2;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 813:	/* lxsihzx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 2);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
 		case 844:	/* lxvd2x */
-		case 876:	/* lxvd2ux */
-			if (!(regs->msr & MSR_VSX))
-				goto vsxunavail;
 			op->reg = rd | ((instr & 1) << 5);
-			op->type = MKOP(LOAD_VSX, u, 16);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 8;
+			break;
+
+		case 876:	/* lxvb16x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 1;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 908:	/* stxvw4x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 4;
+			break;
+
+		case 909:	/* stxsibx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 1);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 940:	/* stxvh8x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 2;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 941:	/* stxsihx */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 2);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
 			break;
 
 		case 972:	/* stxvd2x */
-		case 1004:	/* stxvd2ux */
-			if (!(regs->msr & MSR_VSX))
-				goto vsxunavail;
 			op->reg = rd | ((instr & 1) << 5);
-			op->type = MKOP(STORE_VSX, u, 16);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 8;
+			break;
+
+		case 1004:	/* stxvb16x */
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 1;
+			op->vsx_flags = VSX_CHECK_VEC;
 			break;
 
 #endif /* CONFIG_VSX */
@@ -1606,38 +2382,63 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 #ifdef CONFIG_PPC_FPU
 	case 48:	/* lfs */
 	case 49:	/* lfsu */
-		if (!(regs->msr & MSR_FP))
-			goto fpunavail;
-		op->type = MKOP(LOAD_FP, u, 4);
+		op->type = MKOP(LOAD_FP, u | FPCONV, 4);
 		op->ea = dform_ea(instr, regs);
 		break;
 
 	case 50:	/* lfd */
 	case 51:	/* lfdu */
-		if (!(regs->msr & MSR_FP))
-			goto fpunavail;
 		op->type = MKOP(LOAD_FP, u, 8);
 		op->ea = dform_ea(instr, regs);
 		break;
 
 	case 52:	/* stfs */
 	case 53:	/* stfsu */
-		if (!(regs->msr & MSR_FP))
-			goto fpunavail;
-		op->type = MKOP(STORE_FP, u, 4);
+		op->type = MKOP(STORE_FP, u | FPCONV, 4);
 		op->ea = dform_ea(instr, regs);
 		break;
 
 	case 54:	/* stfd */
 	case 55:	/* stfdu */
-		if (!(regs->msr & MSR_FP))
-			goto fpunavail;
 		op->type = MKOP(STORE_FP, u, 8);
 		op->ea = dform_ea(instr, regs);
 		break;
 #endif
 
 #ifdef __powerpc64__
+	case 56:	/* lq */
+		if (!((rd & 1) || (rd == ra)))
+			op->type = MKOP(LOAD, 0, 16);
+		op->ea = dqform_ea(instr, regs);
+		break;
+#endif
+
+#ifdef CONFIG_VSX
+	case 57:	/* lfdp, lxsd, lxssp */
+		op->ea = dsform_ea(instr, regs);
+		switch (instr & 3) {
+		case 0:		/* lfdp */
+			if (rd & 1)
+				break;		/* reg must be even */
+			op->type = MKOP(LOAD_FP, 0, 16);
+			break;
+		case 2:		/* lxsd */
+			op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+		case 3:		/* lxssp */
+			op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+			break;
+		}
+		break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
 	case 58:	/* ld[u], lwa */
 		op->ea = dsform_ea(instr, regs);
 		switch (instr & 3) {
@@ -1652,7 +2453,57 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			break;
 		}
 		break;
+#endif
 
+#ifdef CONFIG_VSX
+	case 61:	/* stfdp, lxv, stxsd, stxssp, stxv */
+		switch (instr & 7) {
+		case 0:		/* stfdp with LSB of DS field = 0 */
+		case 4:		/* stfdp with LSB of DS field = 1 */
+			op->ea = dsform_ea(instr, regs);
+			op->type = MKOP(STORE_FP, 0, 16);
+			break;
+
+		case 1:		/* lxv */
+			op->ea = dqform_ea(instr, regs);
+			if (instr & 8)
+				op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 2:		/* stxsd with LSB of DS field = 0 */
+		case 6:		/* stxsd with LSB of DS field = 1 */
+			op->ea = dsform_ea(instr, regs);
+			op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 3:		/* stxssp with LSB of DS field = 0 */
+		case 7:		/* stxssp with LSB of DS field = 1 */
+			op->ea = dsform_ea(instr, regs);
+			op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+			break;
+
+		case 5:		/* stxv */
+			op->ea = dqform_ea(instr, regs);
+			if (instr & 8)
+				op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+		}
+		break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
 	case 62:	/* std[u] */
 		op->ea = dsform_ea(instr, regs);
 		switch (instr & 3) {
@@ -1662,6 +2513,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		case 1:		/* stdu */
 			op->type = MKOP(STORE, UPDATE, 8);
 			break;
+		case 2:		/* stq */
+			if (!(rd & 1))
+				op->type = MKOP(STORE, 0, 16);
+			break;
 		}
 		break;
 #endif /* __powerpc64__ */
@@ -1671,15 +2526,18 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 
  logical_done:
 	if (instr & 1)
-		set_cr0(regs, ra);
-	goto instr_done;
+		set_cr0(regs, op, ra);
+ logical_done_nocc:
+	op->reg = ra;
+	op->type |= SETREG;
+	return 1;
 
  arith_done:
 	if (instr & 1)
-		set_cr0(regs, rd);
-
- instr_done:
-	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+		set_cr0(regs, op, rd);
+ compute_done:
+	op->reg = rd;
+	op->type |= SETREG;
 	return 1;
 
  priv:
@@ -1691,24 +2549,6 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 	op->type = INTERRUPT | 0x700;
 	op->val = SRR1_PROGTRAP;
 	return 0;
-
-#ifdef CONFIG_PPC_FPU
- fpunavail:
-	op->type = INTERRUPT | 0x800;
-	return 0;
-#endif
-
-#ifdef CONFIG_ALTIVEC
- vecunavail:
-	op->type = INTERRUPT | 0xf20;
-	return 0;
-#endif
-
-#ifdef CONFIG_VSX
- vsxunavail:
-	op->type = INTERRUPT | 0xf40;
-	return 0;
-#endif
 }
 EXPORT_SYMBOL_GPL(analyse_instr);
 NOKPROBE_SYMBOL(analyse_instr);
@@ -1771,190 +2611,412 @@ static nokprobe_inline void do_byterev(unsigned long *valp, int size)
 }
 
 /*
- * Emulate instructions that cause a transfer of control,
- * loads and stores, and a few other instructions.
- * Returns 1 if the step was emulated, 0 if not,
- * or -1 if the instruction is one that should not be stepped,
- * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
  */
-int emulate_step(struct pt_regs *regs, unsigned int instr)
+void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
 {
-	struct instruction_op op;
-	int r, err, size;
-	unsigned long val;
-	unsigned int cr;
-	int i, rd, nb;
+	unsigned long next_pc;
+
+	next_pc = truncate_if_32bit(regs->msr, regs->nip + 4);
+	switch (op->type & INSTR_TYPE_MASK) {
+	case COMPUTE:
+		if (op->type & SETREG)
+			regs->gpr[op->reg] = op->val;
+		if (op->type & SETCC)
+			regs->ccr = op->ccval;
+		if (op->type & SETXER)
+			regs->xer = op->xerval;
+		break;
 
-	r = analyse_instr(&op, regs, instr);
-	if (r != 0)
-		return r;
+	case BRANCH:
+		if (op->type & SETLK)
+			regs->link = next_pc;
+		if (op->type & BRTAKEN)
+			next_pc = op->val;
+		if (op->type & DECCTR)
+			--regs->ctr;
+		break;
 
-	err = 0;
-	size = GETSIZE(op.type);
-	switch (op.type & INSTR_TYPE_MASK) {
-	case CACHEOP:
-		if (!address_ok(regs, op.ea, 8))
-			return 0;
-		switch (op.type & CACHEOP_MASK) {
-		case DCBST:
-			__cacheop_user_asmx(op.ea, err, "dcbst");
+	case BARRIER:
+		switch (op->type & BARRIER_MASK) {
+		case BARRIER_SYNC:
+			mb();
 			break;
-		case DCBF:
-			__cacheop_user_asmx(op.ea, err, "dcbf");
+		case BARRIER_ISYNC:
+			isync();
 			break;
-		case DCBTST:
-			if (op.reg == 0)
-				prefetchw((void *) op.ea);
+		case BARRIER_EIEIO:
+			eieio();
 			break;
-		case DCBT:
-			if (op.reg == 0)
-				prefetch((void *) op.ea);
+		case BARRIER_LWSYNC:
+			asm volatile("lwsync" : : : "memory");
 			break;
-		case ICBI:
-			__cacheop_user_asmx(op.ea, err, "icbi");
+		case BARRIER_PTESYNC:
+			asm volatile("ptesync" : : : "memory");
 			break;
 		}
-		if (err)
-			return 0;
-		goto instr_done;
+		break;
+
+	case MFSPR:
+		switch (op->spr) {
+		case SPRN_XER:
+			regs->gpr[op->reg] = regs->xer & 0xffffffffUL;
+			break;
+		case SPRN_LR:
+			regs->gpr[op->reg] = regs->link;
+			break;
+		case SPRN_CTR:
+			regs->gpr[op->reg] = regs->ctr;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+		}
+		break;
+
+	case MTSPR:
+		switch (op->spr) {
+		case SPRN_XER:
+			regs->xer = op->val & 0xffffffffUL;
+			break;
+		case SPRN_LR:
+			regs->link = op->val;
+			break;
+		case SPRN_CTR:
+			regs->ctr = op->val;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+		}
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+	}
+	regs->nip = next_pc;
+}
+
+/*
+ * Emulate a previously-analysed load or store instruction.
+ * Return values are:
+ * 0 = instruction emulated successfully
+ * -EFAULT = address out of range or access faulted (regs->dar
+ *	     contains the faulting address)
+ * -EACCES = misaligned access, instruction requires alignment
+ * -EINVAL = unknown operation in *op
+ */
+int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+{
+	int err, size, type;
+	int i, rd, nb;
+	unsigned int cr;
+	unsigned long val;
+	unsigned long ea;
+	bool cross_endian;
+
+	err = 0;
+	size = GETSIZE(op->type);
+	type = op->type & INSTR_TYPE_MASK;
+	cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+	ea = truncate_if_32bit(regs->msr, op->ea);
 
+	switch (type) {
 	case LARX:
-		if (op.ea & (size - 1))
-			break;		/* can't handle misaligned */
-		if (!address_ok(regs, op.ea, size))
-			return 0;
+		if (ea & (size - 1))
+			return -EACCES;		/* can't handle misaligned */
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
 		err = 0;
+		val = 0;
 		switch (size) {
+#ifdef __powerpc64__
+		case 1:
+			__get_user_asmx(val, ea, err, "lbarx");
+			break;
+		case 2:
+			__get_user_asmx(val, ea, err, "lharx");
+			break;
+#endif
 		case 4:
-			__get_user_asmx(val, op.ea, err, "lwarx");
+			__get_user_asmx(val, ea, err, "lwarx");
 			break;
 #ifdef __powerpc64__
 		case 8:
-			__get_user_asmx(val, op.ea, err, "ldarx");
+			__get_user_asmx(val, ea, err, "ldarx");
+			break;
+		case 16:
+			err = do_lqarx(ea, &regs->gpr[op->reg]);
 			break;
 #endif
 		default:
-			return 0;
+			return -EINVAL;
 		}
-		if (!err)
-			regs->gpr[op.reg] = val;
-		goto ldst_done;
+		if (err) {
+			regs->dar = ea;
+			break;
+		}
+		if (size < 16)
+			regs->gpr[op->reg] = val;
+		break;
 
 	case STCX:
-		if (op.ea & (size - 1))
-			break;		/* can't handle misaligned */
-		if (!address_ok(regs, op.ea, size))
-			return 0;
+		if (ea & (size - 1))
+			return -EACCES;		/* can't handle misaligned */
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
 		err = 0;
 		switch (size) {
+#ifdef __powerpc64__
+		case 1:
+			__put_user_asmx(op->val, ea, err, "stbcx.", cr);
+			break;
+		case 2:
+			__put_user_asmx(op->val, ea, err, "stbcx.", cr);
+			break;
+#endif
 		case 4:
-			__put_user_asmx(op.val, op.ea, err, "stwcx.", cr);
+			__put_user_asmx(op->val, ea, err, "stwcx.", cr);
 			break;
 #ifdef __powerpc64__
 		case 8:
-			__put_user_asmx(op.val, op.ea, err, "stdcx.", cr);
+			__put_user_asmx(op->val, ea, err, "stdcx.", cr);
+			break;
+		case 16:
+			err = do_stqcx(ea, regs->gpr[op->reg],
+				       regs->gpr[op->reg + 1], &cr);
 			break;
 #endif
 		default:
-			return 0;
+			return -EINVAL;
 		}
 		if (!err)
 			regs->ccr = (regs->ccr & 0x0fffffff) |
 				(cr & 0xe0000000) |
 				((regs->xer >> 3) & 0x10000000);
-		goto ldst_done;
+		else
+			regs->dar = ea;
+		break;
 
 	case LOAD:
-		err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
+#ifdef __powerpc64__
+		if (size == 16) {
+			err = emulate_lq(regs, ea, op->reg, cross_endian);
+			break;
+		}
+#endif
+		err = read_mem(&regs->gpr[op->reg], ea, size, regs);
 		if (!err) {
-			if (op.type & SIGNEXT)
-				do_signext(&regs->gpr[op.reg], size);
-			if (op.type & BYTEREV)
-				do_byterev(&regs->gpr[op.reg], size);
+			if (op->type & SIGNEXT)
+				do_signext(&regs->gpr[op->reg], size);
+			if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV))
+				do_byterev(&regs->gpr[op->reg], size);
 		}
-		goto ldst_done;
+		break;
 
 #ifdef CONFIG_PPC_FPU
 	case LOAD_FP:
-		if (size == 4)
-			err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
-		else
-			err = do_fp_load(op.reg, do_lfd, op.ea, size, regs);
-		goto ldst_done;
+		/*
+		 * If the instruction is in userspace, we can emulate it even
+		 * if the VMX state is not live, because we have the state
+		 * stored in the thread_struct.  If the instruction is in
+		 * the kernel, we must not touch the state in the thread_struct.
+		 */
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+			return 0;
+		err = do_fp_load(op, ea, regs, cross_endian);
+		break;
 #endif
 #ifdef CONFIG_ALTIVEC
 	case LOAD_VMX:
-		err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
-		goto ldst_done;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+			return 0;
+		err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+		break;
 #endif
 #ifdef CONFIG_VSX
-	case LOAD_VSX:
-		err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
-		goto ldst_done;
+	case LOAD_VSX: {
+		unsigned long msrbit = MSR_VSX;
+
+		/*
+		 * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+		 * when the target of the instruction is a vector register.
+		 */
+		if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+			msrbit = MSR_VEC;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+			return 0;
+		err = do_vsx_load(op, ea, regs, cross_endian);
+		break;
+	}
 #endif
 	case LOAD_MULTI:
-		if (regs->msr & MSR_LE)
-			return 0;
-		rd = op.reg;
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		rd = op->reg;
 		for (i = 0; i < size; i += 4) {
+			unsigned int v32 = 0;
+
 			nb = size - i;
 			if (nb > 4)
 				nb = 4;
-			err = read_mem(&regs->gpr[rd], op.ea, nb, regs);
+			err = copy_mem_in((u8 *) &v32, ea, nb, regs);
 			if (err)
-				return 0;
-			if (nb < 4)	/* left-justify last bytes */
-				regs->gpr[rd] <<= 32 - 8 * nb;
-			op.ea += 4;
-			++rd;
+				break;
+			if (unlikely(cross_endian))
+				v32 = byterev_4(v32);
+			regs->gpr[rd] = v32;
+			ea += 4;
+			/* reg number wraps from 31 to 0 for lsw[ix] */
+			rd = (rd + 1) & 0x1f;
 		}
-		goto instr_done;
+		break;
 
 	case STORE:
-		if ((op.type & UPDATE) && size == sizeof(long) &&
-		    op.reg == 1 && op.update_reg == 1 &&
+#ifdef __powerpc64__
+		if (size == 16) {
+			err = emulate_stq(regs, ea, op->reg, cross_endian);
+			break;
+		}
+#endif
+		if ((op->type & UPDATE) && size == sizeof(long) &&
+		    op->reg == 1 && op->update_reg == 1 &&
 		    !(regs->msr & MSR_PR) &&
-		    op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
-			err = handle_stack_update(op.ea, regs);
-			goto ldst_done;
+		    ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+			err = handle_stack_update(ea, regs);
+			break;
 		}
-		err = write_mem(op.val, op.ea, size, regs);
-		goto ldst_done;
+		if (unlikely(cross_endian))
+			do_byterev(&op->val, size);
+		err = write_mem(op->val, ea, size, regs);
+		break;
 
 #ifdef CONFIG_PPC_FPU
 	case STORE_FP:
-		if (size == 4)
-			err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
-		else
-			err = do_fp_store(op.reg, do_stfd, op.ea, size, regs);
-		goto ldst_done;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+			return 0;
+		err = do_fp_store(op, ea, regs, cross_endian);
+		break;
 #endif
 #ifdef CONFIG_ALTIVEC
 	case STORE_VMX:
-		err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
-		goto ldst_done;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+			return 0;
+		err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+		break;
 #endif
 #ifdef CONFIG_VSX
-	case STORE_VSX:
-		err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
-		goto ldst_done;
+	case STORE_VSX: {
+		unsigned long msrbit = MSR_VSX;
+
+		/*
+		 * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+		 * when the target of the instruction is a vector register.
+		 */
+		if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+			msrbit = MSR_VEC;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+			return 0;
+		err = do_vsx_store(op, ea, regs, cross_endian);
+		break;
+	}
 #endif
 	case STORE_MULTI:
-		if (regs->msr & MSR_LE)
-			return 0;
-		rd = op.reg;
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		rd = op->reg;
 		for (i = 0; i < size; i += 4) {
-			val = regs->gpr[rd];
+			unsigned int v32 = regs->gpr[rd];
+
 			nb = size - i;
 			if (nb > 4)
 				nb = 4;
-			else
-				val >>= 32 - 8 * nb;
-			err = write_mem(val, op.ea, nb, regs);
+			if (unlikely(cross_endian))
+				v32 = byterev_4(v32);
+			err = copy_mem_out((u8 *) &v32, ea, nb, regs);
 			if (err)
-				return 0;
-			op.ea += 4;
-			++rd;
+				break;
+			ea += 4;
+			/* reg number wraps from 31 to 0 for stsw[ix] */
+			rd = (rd + 1) & 0x1f;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (err)
+		return err;
+
+	if (op->type & UPDATE)
+		regs->gpr[op->update_reg] = op->ea;
+
+	return 0;
+}
+NOKPROBE_SYMBOL(emulate_loadstore);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, unsigned int instr)
+{
+	struct instruction_op op;
+	int r, err, type;
+	unsigned long val;
+	unsigned long ea;
+
+	r = analyse_instr(&op, regs, instr);
+	if (r < 0)
+		return r;
+	if (r > 0) {
+		emulate_update_regs(regs, &op);
+		return 1;
+	}
+
+	err = 0;
+	type = op.type & INSTR_TYPE_MASK;
+
+	if (OP_IS_LOAD_STORE(type)) {
+		err = emulate_loadstore(regs, &op);
+		if (err)
+			return 0;
+		goto instr_done;
+	}
+
+	switch (type) {
+	case CACHEOP:
+		ea = truncate_if_32bit(regs->msr, op.ea);
+		if (!address_ok(regs, ea, 8))
+			return 0;
+		switch (op.type & CACHEOP_MASK) {
+		case DCBST:
+			__cacheop_user_asmx(ea, err, "dcbst");
+			break;
+		case DCBF:
+			__cacheop_user_asmx(ea, err, "dcbf");
+			break;
+		case DCBTST:
+			if (op.reg == 0)
+				prefetchw((void *) ea);
+			break;
+		case DCBT:
+			if (op.reg == 0)
+				prefetch((void *) ea);
+			break;
+		case ICBI:
+			__cacheop_user_asmx(ea, err, "icbi");
+			break;
+		case DCBZ:
+			err = emulate_dcbz(ea, regs);
+			break;
+		}
+		if (err) {
+			regs->dar = ea;
+			return 0;
 		}
 		goto instr_done;
 
@@ -1998,12 +3060,6 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 	}
 	return 0;
 
- ldst_done:
-	if (err)
-		return 0;
-	if (op.type & UPDATE)
-		regs->gpr[op.update_reg] = op.ea;
-
  instr_done:
 	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
 	return 1;
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index d5b4d9498c54..56aac4c22025 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -184,7 +184,7 @@ err1;	std	r0,8(r3)
 	mtctr	r6
 	mr	r8,r3
 14:
-err1;	dcbz	r0,r3
+err1;	dcbz	0,r3
 	add	r3,r3,r9
 	bdnz	14b
 
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f4c6472f2fc4..f29212e40f40 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -22,8 +22,11 @@
 
 extern int __map_without_ltlbs;
 
+static unsigned long block_mapped_ram;
+
 /*
- * Return PA for this VA if it is in IMMR area, or 0
+ * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Otherwise, returns 0
  */
 phys_addr_t v_block_mapped(unsigned long va)
 {
@@ -33,11 +36,13 @@ phys_addr_t v_block_mapped(unsigned long va)
 		return 0;
 	if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
 		return p + va - VIRT_IMMR_BASE;
+	if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
+		return __pa(va);
 	return 0;
 }
 
 /*
- * Return VA for a given PA or 0 if not mapped
+ * Return VA for a given PA mapped with LTLBs or 0 if not mapped
  */
 unsigned long p_block_mapped(phys_addr_t pa)
 {
@@ -47,6 +52,8 @@ unsigned long p_block_mapped(phys_addr_t pa)
 		return 0;
 	if (pa >= p && pa < p + IMMR_SIZE)
 		return VIRT_IMMR_BASE + pa - p;
+	if (pa < block_mapped_ram)
+		return (unsigned long)__va(pa);
 	return 0;
 }
 
@@ -58,7 +65,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
 void __init MMU_init_hw(void)
 {
 	/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
 	unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
 	unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
 #ifdef CONFIG_PIN_TLB_IMMR
@@ -80,7 +87,7 @@ void __init MMU_init_hw(void)
 #endif
 }
 
-static void mmu_mapin_immr(void)
+static void __init mmu_mapin_immr(void)
 {
 	unsigned long p = PHYS_IMMR_BASE;
 	unsigned long v = VIRT_IMMR_BASE;
@@ -96,8 +103,11 @@ static void mmu_mapin_immr(void)
 extern unsigned int DTLBMiss_jmp;
 #endif
 extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
+#ifndef CONFIG_PIN_TLB_TEXT
+extern unsigned int ITLBMiss_cmp;
+#endif
 
-void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
 {
 	unsigned int instr = *addr;
 
@@ -116,6 +126,9 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 #ifndef CONFIG_PIN_TLB_IMMR
 		patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
 #endif
+#ifndef CONFIG_PIN_TLB_TEXT
+		mmu_patch_cmp_limit(&ITLBMiss_cmp, 0);
+#endif
 	} else {
 		mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
 	}
@@ -133,11 +146,13 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	if (mapped)
 		memblock_set_current_limit(mapped);
 
+	block_mapped_ram = mapped;
+
 	return mapped;
 }
 
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
-				phys_addr_t first_memblock_size)
+void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size)
 {
 	/* We don't currently support the first MEMBLOCK not mapping 0
 	 * physical on those processors
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 7414034df1c3..fb844d2f266e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -8,7 +8,7 @@ ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
 
 obj-y				:= fault.o mem.o pgtable.o mmap.o \
 				   init_$(BITS).o pgtable_$(BITS).o \
-				   init-common.o
+				   init-common.o mmu_context.o
 obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 				   tlb_nohash_low.o
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(BITS)e.o
@@ -22,8 +22,6 @@ ifeq ($(CONFIG_PPC_STD_MMU_64),y)
 obj-$(CONFIG_PPC_4K_PAGES)	+= hash64_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)	+= hash64_64k.o
 endif
-obj-$(CONFIG_PPC_ICSWX)		+= icswx.o
-obj-$(CONFIG_PPC_ICSWX_PID)	+= icswx_pid.o
 obj-$(CONFIG_40x)		+= 40x_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx_mmu.o
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index b1c144b03fcf..5c4c93dcff19 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -205,7 +205,7 @@ static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
 	aps_index = calculate_pagesize(st, aps, "actual");
 	if (aps_index != 2)
 		seq_printf(st->seq, "LP enc: %lx", lp);
-	seq_puts(st->seq, "\n");
+	seq_putc(st->seq, '\n');
 }
 
 
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index 44fe4833910f..c9282d27b203 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -350,7 +350,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 					  st->current_flags,
 					  pg_level[st->level].num);
 
-			seq_puts(st->seq, "\n");
+			seq_putc(st->seq, '\n');
 		}
 
 		/*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4c422632047b..4797d08581ce 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -45,43 +45,39 @@
 #include <asm/siginfo.h>
 #include <asm/debug.h>
 
-#include "icswx.h"
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline bool notify_page_fault(struct pt_regs *regs)
 {
-	int ret = 0;
+	bool ret = false;
 
+#ifdef CONFIG_KPROBES
 	/* kprobe_running() needs smp_processor_id() */
 	if (!user_mode(regs)) {
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 11))
-			ret = 1;
+			ret = true;
 		preempt_enable();
 	}
+#endif /* CONFIG_KPROBES */
+
+	if (unlikely(debugger_fault_handler(regs)))
+		ret = true;
 
 	return ret;
 }
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
-	return 0;
-}
-#endif
 
 /*
  * Check whether the instruction at regs->nip is a store using
  * an update addressing form which will update r1.
  */
-static int store_updates_sp(struct pt_regs *regs)
+static bool store_updates_sp(struct pt_regs *regs)
 {
 	unsigned int inst;
 
 	if (get_user(inst, (unsigned int __user *)regs->nip))
-		return 0;
+		return false;
 	/* check for 1 in the rA field */
 	if (((inst >> 16) & 0x1f) != 1)
-		return 0;
+		return false;
 	/* check major opcode */
 	switch (inst >> 26) {
 	case 37:	/* stwu */
@@ -89,7 +85,7 @@ static int store_updates_sp(struct pt_regs *regs)
 	case 45:	/* sthu */
 	case 53:	/* stfsu */
 	case 55:	/* stfdu */
-		return 1;
+		return true;
 	case 62:	/* std or stdu */
 		return (inst & 3) == 1;
 	case 31:
@@ -101,18 +97,53 @@ static int store_updates_sp(struct pt_regs *regs)
 		case 439:	/* sthux */
 		case 695:	/* stfsux */
 		case 759:	/* stfdux */
-			return 1;
+			return true;
 		}
 	}
-	return 0;
+	return false;
 }
 /*
  * do_page_fault error handling helpers
  */
 
-#define MM_FAULT_RETURN		0
-#define MM_FAULT_CONTINUE	-1
-#define MM_FAULT_ERR(sig)	(sig)
+static int
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+{
+	/*
+	 * If we are in kernel mode, bail out with a SEGV, this will
+	 * be caught by the assembly which will restore the non-volatile
+	 * registers before calling bad_page_fault()
+	 */
+	if (!user_mode(regs))
+		return SIGSEGV;
+
+	_exception(SIGSEGV, regs, si_code, address);
+
+	return 0;
+}
+
+static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
+{
+	return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+}
+
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+{
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+	up_read(&mm->mmap_sem);
+
+	return __bad_area_nosemaphore(regs, address, si_code);
+}
+
+static noinline int bad_area(struct pt_regs *regs, unsigned long address)
+{
+	return __bad_area(regs, address, SEGV_MAPERR);
+}
 
 static int do_sigbus(struct pt_regs *regs, unsigned long address,
 		     unsigned int fault)
@@ -121,7 +152,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
 	unsigned int lsb = 0;
 
 	if (!user_mode(regs))
-		return MM_FAULT_ERR(SIGBUS);
+		return SIGBUS;
 
 	current->thread.trap_nr = BUS_ADRERR;
 	info.si_signo = SIGBUS;
@@ -142,25 +173,17 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
 #endif
 	info.si_addr_lsb = lsb;
 	force_sig_info(SIGBUS, &info, current);
-	return MM_FAULT_RETURN;
+	return 0;
 }
 
 static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
 {
 	/*
-	 * Pagefault was interrupted by SIGKILL. We have no reason to
-	 * continue the pagefault.
+	 * Kernel page fault interrupted by SIGKILL. We have no reason to
+	 * continue processing.
 	 */
-	if (fatal_signal_pending(current)) {
-		/* Coming from kernel, we need to deal with uaccess fixups */
-		if (user_mode(regs))
-			return MM_FAULT_RETURN;
-		return MM_FAULT_ERR(SIGKILL);
-	}
-
-	/* No fault: be happy */
-	if (!(fault & VM_FAULT_ERROR))
-		return MM_FAULT_CONTINUE;
+	if (fatal_signal_pending(current) && !user_mode(regs))
+		return SIGKILL;
 
 	/* Out of memory */
 	if (fault & VM_FAULT_OOM) {
@@ -169,19 +192,176 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
 		 * made us unable to handle the page fault gracefully.
 		 */
 		if (!user_mode(regs))
-			return MM_FAULT_ERR(SIGKILL);
+			return SIGSEGV;
 		pagefault_out_of_memory();
-		return MM_FAULT_RETURN;
+	} else {
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+			     VM_FAULT_HWPOISON_LARGE))
+			return do_sigbus(regs, addr, fault);
+		else if (fault & VM_FAULT_SIGSEGV)
+			return bad_area_nosemaphore(regs, addr);
+		else
+			BUG();
+	}
+	return 0;
+}
+
+/* Is this a bad kernel fault ? */
+static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
+			     unsigned long address)
+{
+	if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
+		printk_ratelimited(KERN_CRIT "kernel tried to execute"
+				   " exec-protected page (%lx) -"
+				   "exploit attempt? (uid: %d)\n",
+				   address, from_kuid(&init_user_ns,
+						      current_uid()));
 	}
+	return is_exec || (address >= TASK_SIZE);
+}
 
-	if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
-		return do_sigbus(regs, addr, fault);
+static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+				struct vm_area_struct *vma,
+				bool store_update_sp)
+{
+	/*
+	 * N.B. The POWER/Open ABI allows programs to access up to
+	 * 288 bytes below the stack pointer.
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			return true;
 
-	/* We don't understand the fault code, this is fatal */
-	BUG();
-	return MM_FAULT_CONTINUE;
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+			return true;
+	}
+	return false;
+}
+
+static bool access_error(bool is_write, bool is_exec,
+			 struct vm_area_struct *vma)
+{
+	/*
+	 * Allow execution from readable areas if the MMU does not
+	 * provide separate controls over reading and executing.
+	 *
+	 * Note: That code used to not be enabled for 4xx/BookE.
+	 * It is now as I/D cache coherency for these is done at
+	 * set_pte_at() time and I see no reason why the test
+	 * below wouldn't be valid on those processors. This -may-
+	 * break programs compiled with a really old ABI though.
+	 */
+	if (is_exec) {
+		return !(vma->vm_flags & VM_EXEC) &&
+			(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
+			 !(vma->vm_flags & (VM_READ | VM_WRITE)));
+	}
+
+	if (is_write) {
+		if (unlikely(!(vma->vm_flags & VM_WRITE)))
+			return true;
+		return false;
+	}
+
+	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+		return true;
+
+	return false;
 }
 
+#ifdef CONFIG_PPC_SMLPAR
+static inline void cmo_account_page_fault(void)
+{
+	if (firmware_has_feature(FW_FEATURE_CMO)) {
+		u32 page_ins;
+
+		preempt_disable();
+		page_ins = be32_to_cpu(get_lppaca()->page_ins);
+		page_ins += 1 << PAGE_FACTOR;
+		get_lppaca()->page_ins = cpu_to_be32(page_ins);
+		preempt_enable();
+	}
+}
+#else
+static inline void cmo_account_page_fault(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+
+#ifdef CONFIG_PPC_STD_MMU
+static void sanity_check_fault(bool is_write, unsigned long error_code)
+{
+	/*
+	 * For hash translation mode, we should never get a
+	 * PROTFAULT. Any update to pte to reduce access will result in us
+	 * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+	 * fault instead of DSISR_PROTFAULT.
+	 *
+	 * A pte update to relax the access will not result in a hash page table
+	 * entry invalidate and hence can result in DSISR_PROTFAULT.
+	 * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+	 * the special !is_write in the below conditional.
+	 *
+	 * For platforms that doesn't supports coherent icache and do support
+	 * per page noexec bit, we do setup things such that we do the
+	 * sync between D/I cache via fault. But that is handled via low level
+	 * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+	 * here in such case.
+	 *
+	 * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+	 * check should handle those and hence we should fall to the bad_area
+	 * handling correctly.
+	 *
+	 * For embedded with per page exec support that doesn't support coherent
+	 * icache we do get PROTFAULT and we handle that D/I cache sync in
+	 * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+	 * is conditional for server MMU.
+	 *
+	 * For radix, we can get prot fault for autonuma case, because radix
+	 * page table will have them marked noaccess for user.
+	 */
+	if (!radix_enabled() && !is_write)
+		WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+}
+#else
+static void sanity_check_fault(bool is_write, unsigned long error_code) { }
+#endif /* CONFIG_PPC_STD_MMU */
+
+/*
+ * Define the correct "is_write" bit in error_code based
+ * on the processor family
+ */
+#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#define page_fault_is_write(__err)	((__err) & ESR_DST)
+#define page_fault_is_bad(__err)	(0)
+#else
+#define page_fault_is_write(__err)	((__err) & DSISR_ISSTORE)
+#if defined(CONFIG_PPC_8xx)
+#define page_fault_is_bad(__err)	((__err) & DSISR_NOEXEC_OR_G)
+#elif defined(CONFIG_PPC64)
+#define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_64S)
+#else
+#define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
+#endif
+#endif
+
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -195,92 +375,56 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
  * The return value is 0 if the fault was handled, or the signal
  * number if this is a kernel fault that can't be handled here.
  */
-int do_page_fault(struct pt_regs *regs, unsigned long address,
-			    unsigned long error_code)
+static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+			   unsigned long error_code)
 {
-	enum ctx_state prev_state = exception_enter();
 	struct vm_area_struct * vma;
 	struct mm_struct *mm = current->mm;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
-	int code = SEGV_MAPERR;
-	int is_write = 0;
-	int trap = TRAP(regs);
- 	int is_exec = trap == 0x400;
+ 	int is_exec = TRAP(regs) == 0x400;
 	int is_user = user_mode(regs);
-	int fault;
-	int rc = 0, store_update_sp = 0;
+	int is_write = page_fault_is_write(error_code);
+	int fault, major = 0;
+	bool store_update_sp = false;
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-	/*
-	 * Fortunately the bit assignments in SRR1 for an instruction
-	 * fault and DSISR for a data fault are mostly the same for the
-	 * bits we are interested in.  But there are some bits which
-	 * indicate errors in DSISR but can validly be set in SRR1.
-	 */
-	if (is_exec)
-		error_code &= 0x48200000;
-	else
-		is_write = error_code & DSISR_ISSTORE;
-#else
-	is_write = error_code & ESR_DST;
-#endif /* CONFIG_4xx || CONFIG_BOOKE */
+	if (notify_page_fault(regs))
+		return 0;
 
-#ifdef CONFIG_PPC_ICSWX
-	/*
-	 * we need to do this early because this "data storage
-	 * interrupt" does not update the DAR/DEAR so we don't want to
-	 * look at it
-	 */
-	if (error_code & ICSWX_DSI_UCT) {
-		rc = acop_handle_fault(regs, address, error_code);
-		if (rc)
-			goto bail;
+	if (unlikely(page_fault_is_bad(error_code))) {
+		if (is_user) {
+			_exception(SIGBUS, regs, BUS_OBJERR, address);
+			return 0;
+		}
+		return SIGBUS;
 	}
-#endif /* CONFIG_PPC_ICSWX */
-
-	if (notify_page_fault(regs))
-		goto bail;
 
-	if (unlikely(debugger_fault_handler(regs)))
-		goto bail;
+	/* Additional sanity check(s) */
+	sanity_check_fault(is_write, error_code);
 
 	/*
 	 * The kernel should never take an execute fault nor should it
 	 * take a page fault to a kernel address.
 	 */
-	if (!is_user && (is_exec || (address >= TASK_SIZE))) {
-		rc = SIGSEGV;
-		goto bail;
-	}
+	if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+		return SIGSEGV;
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
-      defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx))
-  	if (error_code & DSISR_DABRMATCH) {
-		/* breakpoint match */
-		do_break(regs, address, error_code);
-		goto bail;
+	/*
+	 * If we're in an interrupt, have no user context or are running
+	 * in a region with pagefaults disabled then we must not take the fault
+	 */
+	if (unlikely(faulthandler_disabled() || !mm)) {
+		if (is_user)
+			printk_ratelimited(KERN_ERR "Page fault in user mode"
+					   " with faulthandler_disabled()=%d"
+					   " mm=%p\n",
+					   faulthandler_disabled(), mm);
+		return bad_area_nosemaphore(regs, address);
 	}
-#endif
 
 	/* We restore the interrupt state now */
 	if (!arch_irq_disabled_regs(regs))
 		local_irq_enable();
 
-	if (faulthandler_disabled() || mm == NULL) {
-		if (!is_user) {
-			rc = SIGSEGV;
-			goto bail;
-		}
-		/* faulthandler_disabled() in user mode is really bad,
-		   as is current->mm == NULL. */
-		printk(KERN_EMERG "Page fault in user mode with "
-		       "faulthandler_disabled() = %d mm = %p\n",
-		       faulthandler_disabled(), mm);
-		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
-		       regs->nip, regs->msr);
-		die("Weird page fault", regs, SIGSEGV);
-	}
-
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
 	/*
@@ -293,6 +437,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 
 	if (is_user)
 		flags |= FAULT_FLAG_USER;
+	if (is_write)
+		flags |= FAULT_FLAG_WRITE;
+	if (is_exec)
+		flags |= FAULT_FLAG_INSTRUCTION;
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -309,9 +457,9 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * source.  If this is invalid we can skip the address space check,
 	 * thus avoiding the deadlock.
 	 */
-	if (!down_read_trylock(&mm->mmap_sem)) {
+	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
 		if (!is_user && !search_exception_tables(regs->nip))
-			goto bad_area_nosemaphore;
+			return bad_area_nosemaphore(regs, address);
 
 retry:
 		down_read(&mm->mmap_sem);
@@ -325,122 +473,24 @@ retry:
 	}
 
 	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= address)
+	if (unlikely(!vma))
+		return bad_area(regs, address);
+	if (likely(vma->vm_start <= address))
 		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
+	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+		return bad_area(regs, address);
 
-	/*
-	 * N.B. The POWER/Open ABI allows programs to access up to
-	 * 288 bytes below the stack pointer.
-	 * The kernel signal delivery code writes up to about 1.5kB
-	 * below the stack pointer (r1) before decrementing it.
-	 * The exec code can write slightly over 640kB to the stack
-	 * before setting the user r1.  Thus we allow the stack to
-	 * expand to 1MB without further checks.
-	 */
-	if (address + 0x100000 < vma->vm_end) {
-		/* get user regs even if this fault is in kernel mode */
-		struct pt_regs *uregs = current->thread.regs;
-		if (uregs == NULL)
-			goto bad_area;
+	/* The stack is being expanded, check if it's valid */
+	if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
+		return bad_area(regs, address);
 
-		/*
-		 * A user-mode access to an address a long way below
-		 * the stack pointer is only valid if the instruction
-		 * is one which would update the stack pointer to the
-		 * address accessed if the instruction completed,
-		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
-		 * (or the byte, halfword, float or double forms).
-		 *
-		 * If we don't check this then any write to the area
-		 * between the last mapped region and the stack will
-		 * expand the stack rather than segfaulting.
-		 */
-		if (address + 2048 < uregs->gpr[1] && !store_update_sp)
-			goto bad_area;
-	}
-	if (expand_stack(vma, address))
-		goto bad_area;
+	/* Try to expand it */
+	if (unlikely(expand_stack(vma, address)))
+		return bad_area(regs, address);
 
 good_area:
-	code = SEGV_ACCERR;
-#if defined(CONFIG_6xx)
-	if (error_code & 0x95700000)
-		/* an error such as lwarx to I/O controller space,
-		   address matching DABR, eciwx, etc. */
-		goto bad_area;
-#endif /* CONFIG_6xx */
-#if defined(CONFIG_8xx)
-        /* The MPC8xx seems to always set 0x80000000, which is
-         * "undefined".  Of those that can be set, this is the only
-         * one which seems bad.
-         */
-	if (error_code & 0x10000000)
-                /* Guarded storage error. */
-		goto bad_area;
-#endif /* CONFIG_8xx */
-
-	if (is_exec) {
-		/*
-		 * Allow execution from readable areas if the MMU does not
-		 * provide separate controls over reading and executing.
-		 *
-		 * Note: That code used to not be enabled for 4xx/BookE.
-		 * It is now as I/D cache coherency for these is done at
-		 * set_pte_at() time and I see no reason why the test
-		 * below wouldn't be valid on those processors. This -may-
-		 * break programs compiled with a really old ABI though.
-		 */
-		if (!(vma->vm_flags & VM_EXEC) &&
-		    (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
-		     !(vma->vm_flags & (VM_READ | VM_WRITE))))
-			goto bad_area;
-	/* a write */
-	} else if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-		flags |= FAULT_FLAG_WRITE;
-	/* a read */
-	} else {
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-			goto bad_area;
-	}
-#ifdef CONFIG_PPC_STD_MMU
-	/*
-	 * For hash translation mode, we should never get a
-	 * PROTFAULT. Any update to pte to reduce access will result in us
-	 * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
-	 * fault instead of DSISR_PROTFAULT.
-	 *
-	 * A pte update to relax the access will not result in a hash page table
-	 * entry invalidate and hence can result in DSISR_PROTFAULT.
-	 * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
-	 * the special !is_write in the below conditional.
-	 *
-	 * For platforms that doesn't supports coherent icache and do support
-	 * per page noexec bit, we do setup things such that we do the
-	 * sync between D/I cache via fault. But that is handled via low level
-	 * hash fault code (hash_page_do_lazy_icache()) and we should not reach
-	 * here in such case.
-	 *
-	 * For wrong access that can result in PROTFAULT, the above vma->vm_flags
-	 * check should handle those and hence we should fall to the bad_area
-	 * handling correctly.
-	 *
-	 * For embedded with per page exec support that doesn't support coherent
-	 * icache we do get PROTFAULT and we handle that D/I cache sync in
-	 * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
-	 * is conditional for server MMU.
-	 *
-	 * For radix, we can get prot fault for autonuma case, because radix
-	 * page table will have them marked noaccess for user.
-	 */
-	if (!radix_enabled() && !is_write)
-		WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
-#endif /* CONFIG_PPC_STD_MMU */
+	if (unlikely(access_error(is_write, is_exec, vma)))
+		return bad_area(regs, address);
 
 	/*
 	 * If for any reason at all we couldn't handle the fault,
@@ -448,6 +498,7 @@ good_area:
 	 * the fault.
 	 */
 	fault = handle_mm_fault(vma, address, flags);
+	major |= fault & VM_FAULT_MAJOR;
 
 	/*
 	 * Handle the retry right now, the mmap_sem has been released in that
@@ -465,64 +516,39 @@ good_area:
 			if (!fatal_signal_pending(current))
 				goto retry;
 		}
-		/* We will enter mm_fault_error() below */
-	} else
-		up_read(&current->mm->mmap_sem);
-
-	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
-		if (fault & VM_FAULT_SIGSEGV)
-			goto bad_area_nosemaphore;
-		rc = mm_fault_error(regs, address, fault);
-		if (rc >= MM_FAULT_RETURN)
-			goto bail;
-		else
-			rc = 0;
+
+		/*
+		 * User mode? Just return to handle the fatal exception otherwise
+		 * return to bad_page_fault
+		 */
+		return is_user ? 0 : SIGBUS;
 	}
 
+	up_read(&current->mm->mmap_sem);
+
+	if (unlikely(fault & VM_FAULT_ERROR))
+		return mm_fault_error(regs, address, fault);
+
 	/*
 	 * Major/minor page fault accounting.
 	 */
-	if (fault & VM_FAULT_MAJOR) {
+	if (major) {
 		current->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-			      regs, address);
-#ifdef CONFIG_PPC_SMLPAR
-		if (firmware_has_feature(FW_FEATURE_CMO)) {
-			u32 page_ins;
-
-			preempt_disable();
-			page_ins = be32_to_cpu(get_lppaca()->page_ins);
-			page_ins += 1 << PAGE_FACTOR;
-			get_lppaca()->page_ins = cpu_to_be32(page_ins);
-			preempt_enable();
-		}
-#endif /* CONFIG_PPC_SMLPAR */
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+		cmo_account_page_fault();
 	} else {
 		current->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-			      regs, address);
-	}
-
-	goto bail;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	/* User mode accesses cause a SIGSEGV */
-	if (is_user) {
-		_exception(SIGSEGV, regs, code, address);
-		goto bail;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
 	}
+	return 0;
+}
+NOKPROBE_SYMBOL(__do_page_fault);
 
-	if (is_exec && (error_code & DSISR_PROTFAULT))
-		printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
-				   " page (%lx) - exploit attempt? (uid: %d)\n",
-				   address, from_kuid(&init_user_ns, current_uid()));
-
-	rc = SIGSEGV;
-
-bail:
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+		  unsigned long error_code)
+{
+	enum ctx_state prev_state = exception_enter();
+	int rc = __do_page_fault(regs, address, error_code);
 	exception_exit(prev_state);
 	return rc;
 }
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 6f962e5cb5e1..ffbd7c0bda96 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -575,7 +575,6 @@ _GLOBAL(flush_hash_pages)
 	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
 	stwcx.	r8,0,r5			/* update the pte */
 	bne-	33b
-EXPORT_SYMBOL(flush_hash_pages)
 
 	/* Get the address of the primary PTE group in the hash table (r3) */
 _GLOBAL(flush_hash_patch_A)
@@ -634,6 +633,7 @@ _GLOBAL(flush_hash_patch_B)
 	SYNC_601
 	isync
 	blr
+EXPORT_SYMBOL(flush_hash_pages)
 
 /*
  * Flush an entry from the TLB
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7a20669c19e7..67ec2e927253 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -61,6 +61,7 @@
 #include <asm/tm.h>
 #include <asm/trace.h>
 #include <asm/ps3.h>
+#include <asm/pte-walk.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -507,9 +508,9 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 	printk(KERN_INFO "Huge page(16GB) memory: "
 			"addr = 0x%lX size = 0x%lX pages = %d\n",
 			phys_addr, block_size, expected_pages);
-	if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) {
+	if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
 		memblock_reserve(phys_addr, block_size * expected_pages);
-		add_gpage(phys_addr, block_size, expected_pages);
+		pseries_add_gpage(phys_addr, block_size, expected_pages);
 	}
 	return 0;
 }
@@ -1019,6 +1020,7 @@ void __init hash__early_init_mmu(void)
 	__kernel_virt_size = H_KERN_VIRT_SIZE;
 	__vmalloc_start = H_VMALLOC_START;
 	__vmalloc_end = H_VMALLOC_END;
+	__kernel_io_start = H_KERN_IO_START;
 	vmemmap = (struct page *)H_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
 
@@ -1228,7 +1230,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	unsigned long vsid;
 	pte_t *ptep;
 	unsigned hugeshift;
-	const struct cpumask *tmp;
 	int rc, user_region = 0;
 	int psize, ssize;
 
@@ -1280,8 +1281,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	}
 
 	/* Check CPU locality */
-	tmp = cpumask_of(smp_processor_id());
-	if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
+	if (user_region && mm_is_thread_local(mm))
 		flags |= HPTE_LOCAL_UPDATE;
 
 #ifndef CONFIG_PPC_64K_PAGES
@@ -1297,7 +1297,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	/* Get PTE and page size from page tables */
-	ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift);
+	ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
 	if (ptep == NULL || !pte_present(*ptep)) {
 		DBG_LOW(" no PTE !\n");
 		rc = 1;
@@ -1526,7 +1526,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	 * THP pages use update_mmu_cache_pmd. We don't do
 	 * hash preload there. Hence can ignore THP here
 	 */
-	ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift);
+	ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift);
 	if (!ptep)
 		goto out_exit;
 
@@ -1543,7 +1543,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	/* Is that local to this CPU ? */
-	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+	if (mm_is_thread_local(mm))
 		update_flags |= HPTE_LOCAL_UPDATE;
 
 	/* Hash it in */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e1bf5ca397fe..1571a498a33f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,8 @@
 #include <asm/tlb.h>
 #include <asm/setup.h>
 #include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+
 
 #ifdef CONFIG_HUGETLB_PAGE
 
@@ -36,32 +38,15 @@
 unsigned int HPAGE_SHIFT;
 EXPORT_SYMBOL(HPAGE_SHIFT);
 
-/*
- * Tracks gpages after the device tree is scanned and before the
- * huge_boot_pages list is ready.  On non-Freescale implementations, this is
- * just used to track 16G pages and so is a single array.  FSL-based
- * implementations may have more than one gpage size, so we need multiple
- * arrays
- */
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
-#define MAX_NUMBER_GPAGES	128
-struct psize_gpages {
-	u64 gpage_list[MAX_NUMBER_GPAGES];
-	unsigned int nr_gpages;
-};
-static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
-#else
-#define MAX_NUMBER_GPAGES	1024
-static u64 gpage_freearray[MAX_NUMBER_GPAGES];
-static unsigned nr_gpages;
-#endif
-
 #define hugepd_none(hpd)	(hpd_val(hpd) == 0)
 
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
-	/* Only called for hugetlbfs pages, hence can ignore THP */
-	return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
+	/*
+	 * Only called for hugetlbfs pages, hence can ignore THP and the
+	 * irq disabled walk.
+	 */
+	return __find_linux_pte(mm->pgd, addr, NULL, NULL);
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -210,145 +195,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	return hugepte_offset(*hpdp, addr, pdshift);
 }
 
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
-/* Build list of addresses of gigantic pages.  This function is used in early
- * boot before the buddy allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
-{
-	unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
-	int i;
-
-	if (addr == 0)
-		return;
-
-	gpage_freearray[idx].nr_gpages = number_of_pages;
-
-	for (i = 0; i < number_of_pages; i++) {
-		gpage_freearray[idx].gpage_list[i] = addr;
-		addr += page_size;
-	}
-}
-
-/*
- * Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
-{
-	struct huge_bootmem_page *m;
-	int idx = shift_to_mmu_psize(huge_page_shift(hstate));
-	int nr_gpages = gpage_freearray[idx].nr_gpages;
-
-	if (nr_gpages == 0)
-		return 0;
-
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * If gpages can be in highmem we can't use the trick of storing the
-	 * data structure in the page; allocate space for this
-	 */
-	m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
-	m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
-#else
-	m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
-#endif
-
-	list_add(&m->list, &huge_boot_pages);
-	gpage_freearray[idx].nr_gpages = nr_gpages;
-	gpage_freearray[idx].gpage_list[nr_gpages] = 0;
-	m->hstate = hstate;
-
-	return 1;
-}
+#ifdef CONFIG_PPC_BOOK3S_64
 /*
- * Scan the command line hugepagesz= options for gigantic pages; store those in
- * a list that we use to allocate the memory once all options are parsed.
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready on pseries.
  */
-
-unsigned long gpage_npages[MMU_PAGE_COUNT];
-
-static int __init do_gpage_early_setup(char *param, char *val,
-				       const char *unused, void *arg)
-{
-	static phys_addr_t size;
-	unsigned long npages;
-
-	/*
-	 * The hugepagesz and hugepages cmdline options are interleaved.  We
-	 * use the size variable to keep track of whether or not this was done
-	 * properly and skip over instances where it is incorrect.  Other
-	 * command-line parsing code will issue warnings, so we don't need to.
-	 *
-	 */
-	if ((strcmp(param, "default_hugepagesz") == 0) ||
-	    (strcmp(param, "hugepagesz") == 0)) {
-		size = memparse(val, NULL);
-	} else if (strcmp(param, "hugepages") == 0) {
-		if (size != 0) {
-			if (sscanf(val, "%lu", &npages) <= 0)
-				npages = 0;
-			if (npages > MAX_NUMBER_GPAGES) {
-				pr_warn("MMU: %lu pages requested for page "
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-					"size %llu KB, limiting to "
-#else
-					"size %u KB, limiting to "
-#endif
-					__stringify(MAX_NUMBER_GPAGES) "\n",
-					npages, size / 1024);
-				npages = MAX_NUMBER_GPAGES;
-			}
-			gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
-			size = 0;
-		}
-	}
-	return 0;
-}
-
+#define MAX_NUMBER_GPAGES	1024
+__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+__initdata static unsigned nr_gpages;
 
 /*
- * This function allocates physical space for pages that are larger than the
- * buddy allocator can handle.  We want to allocate these in highmem because
- * the amount of lowmem is limited.  This means that this function MUST be
- * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
- * allocate to grab highmem.
- */
-void __init reserve_hugetlb_gpages(void)
-{
-	static __initdata char cmdline[COMMAND_LINE_SIZE];
-	phys_addr_t size, base;
-	int i;
-
-	strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
-	parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
-			NULL, &do_gpage_early_setup);
-
-	/*
-	 * Walk gpage list in reverse, allocating larger page sizes first.
-	 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
-	 * When we reach the point in the list where pages are no longer
-	 * considered gpages, we're done.
-	 */
-	for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
-		if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
-			continue;
-		else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
-			break;
-
-		size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
-		base = memblock_alloc_base(size * gpage_npages[i], size,
-					   MEMBLOCK_ALLOC_ANYWHERE);
-		add_gpage(base, size, gpage_npages[i]);
-	}
-}
-
-#else /* !PPC_FSL_BOOK3E */
-
-/* Build list of addresses of gigantic pages.  This function is used in early
+ * Build list of addresses of gigantic pages.  This function is used in early
  * boot before the buddy allocator is setup.
  */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
 {
 	if (!addr)
 		return;
@@ -360,10 +220,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
 	}
 }
 
-/* Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
+int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
 {
 	struct huge_bootmem_page *m;
 	if (nr_gpages == 0)
@@ -376,6 +233,17 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
 }
 #endif
 
+
+int __init alloc_bootmem_huge_page(struct hstate *h)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+		return pseries_alloc_bootmem_huge_page(h);
+#endif
+	return __alloc_bootmem_huge_page(h);
+}
+
 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
 #define HUGEPD_FREELIST_SIZE \
 	((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -407,8 +275,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
 	batchp = &get_cpu_var(hugepd_freelist_cur);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
-	    cpumask_equal(mm_cpumask(tlb->mm),
-			  cpumask_of(smp_processor_id()))) {
+	    mm_is_thread_local(tlb->mm)) {
 		kmem_cache_free(hugepte_cache, hugepte);
 		put_cpu_var(hugepd_freelist_cur);
 		return;
@@ -886,9 +753,8 @@ void flush_dcache_icache_hugepage(struct page *page)
  * This function need to be called with interrupts disabled. We use this variant
  * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
  */
-
-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
-				   bool *is_thp, unsigned *shift)
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+			bool *is_thp, unsigned *hpage_shift)
 {
 	pgd_t pgd, *pgdp;
 	pud_t pud, *pudp;
@@ -897,8 +763,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 	hugepd_t *hpdp = NULL;
 	unsigned pdshift = PGDIR_SHIFT;
 
-	if (shift)
-		*shift = 0;
+	if (hpage_shift)
+		*hpage_shift = 0;
 
 	if (is_thp)
 		*is_thp = false;
@@ -968,16 +834,15 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 	ret_pte = hugepte_offset(*hpdp, ea, pdshift);
 	pdshift = hugepd_shift(*hpdp);
 out:
-	if (shift)
-		*shift = pdshift;
+	if (hpage_shift)
+		*hpage_shift = pdshift;
 	return ret_pte;
 }
-EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
+EXPORT_SYMBOL_GPL(__find_linux_pte);
 
 int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
-	unsigned long mask;
 	unsigned long pte_end;
 	struct page *head, *page;
 	pte_t pte;
@@ -988,18 +853,10 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		end = pte_end;
 
 	pte = READ_ONCE(*ptep);
-	mask = _PAGE_PRESENT | _PAGE_READ;
 
-	/*
-	 * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined
-	 * as 0 and _PAGE_RO has to be set when a page is not writable
-	 */
-	if (write)
-		mask |= _PAGE_WRITE;
-	else
-		mask |= _PAGE_RO;
-
-	if ((pte_val(pte) & mask) != mask)
+	if (!pte_present(pte) || !pte_read(pte))
+		return 0;
+	if (write && !pte_write(pte))
 		return 0;
 
 	/* hugepages are never "special" */
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
deleted file mode 100644
index 1fa794d7d59f..000000000000
--- a/arch/powerpc/mm/icswx.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- *  ICSWX and ACOP Management
- *
- *  Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-
-#include "icswx.h"
-
-/*
- * The processor and its L2 cache cause the icswx instruction to
- * generate a COP_REQ transaction on PowerBus. The transaction has no
- * address, and the processor does not perform an MMU access to
- * authenticate the transaction. The command portion of the PowerBus
- * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor
- * Process ID (PID), which the coprocessor compares to the authorized
- * LPID and PID held in the coprocessor, to determine if the process
- * is authorized to generate the transaction.  The data of the COP_REQ
- * transaction is 128-byte or less in size and is placed in cacheable
- * memory on a 128-byte cache line boundary.
- *
- * The task to use a coprocessor should use use_cop() to mark the use
- * of the Coprocessor Type (CT) and context switching. On a server
- * class processor, the PID register is used only for coprocessor
- * management + * and so a coprocessor PID is allocated before
- * executing icswx + * instruction. Drop_cop() is used to free the
- * coprocessor PID.
- *
- * Example:
- * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
- * Each HFI have multiple windows. Each HFI window serves as a
- * network device sending to and receiving from HFI network.
- * HFI immediate send function uses icswx instruction. The immediate
- * send function allows small (single cache-line) packets be sent
- * without using the regular HFI send FIFO and doorbell, which are
- * much slower than immediate send.
- *
- * For each task intending to use HFI immediate send, the HFI driver
- * calls use_cop() to obtain a coprocessor PID for the task.
- * The HFI driver then allocate a free HFI window and save the
- * coprocessor PID to the HFI window to allow the task to use the
- * HFI window.
- *
- * The HFI driver repeatedly creates immediate send packets and
- * issues icswx instruction to send data through the HFI window.
- * The HFI compares the coprocessor PID in the CPU PID register
- * to the PID held in the HFI window to determine if the transaction
- * is allowed.
- *
- * When the task to release the HFI window, the HFI driver calls
- * drop_cop() to release the coprocessor PID.
- */
-
-void switch_cop(struct mm_struct *next)
-{
-#ifdef CONFIG_PPC_ICSWX_PID
-	mtspr(SPRN_PID, next->context.cop_pid);
-#endif
-	mtspr(SPRN_ACOP, next->context.acop);
-}
-
-/**
- * Start using a coprocessor.
- * @acop: mask of coprocessor to be used.
- * @mm: The mm the coprocessor to associate with. Most likely current mm.
- *
- * Return a positive PID if successful. Negative errno otherwise.
- * The returned PID will be fed to the coprocessor to determine if an
- * icswx transaction is authenticated.
- */
-int use_cop(unsigned long acop, struct mm_struct *mm)
-{
-	int ret;
-
-	if (!cpu_has_feature(CPU_FTR_ICSWX))
-		return -ENODEV;
-
-	if (!mm || !acop)
-		return -EINVAL;
-
-	/* The page_table_lock ensures mm_users won't change under us */
-	spin_lock(&mm->page_table_lock);
-	spin_lock(mm->context.cop_lockp);
-
-	ret = get_cop_pid(mm);
-	if (ret < 0)
-		goto out;
-
-	/* update acop */
-	mm->context.acop |= acop;
-
-	sync_cop(mm);
-
-	/*
-	 * If this is a threaded process then there might be other threads
-	 * running. We need to send an IPI to force them to pick up any
-	 * change in PID and ACOP.
-	 */
-	if (atomic_read(&mm->mm_users) > 1)
-		smp_call_function(sync_cop, mm, 1);
-
-out:
-	spin_unlock(mm->context.cop_lockp);
-	spin_unlock(&mm->page_table_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(use_cop);
-
-/**
- * Stop using a coprocessor.
- * @acop: mask of coprocessor to be stopped.
- * @mm: The mm the coprocessor associated with.
- */
-void drop_cop(unsigned long acop, struct mm_struct *mm)
-{
-	int free_pid;
-
-	if (!cpu_has_feature(CPU_FTR_ICSWX))
-		return;
-
-	if (WARN_ON_ONCE(!mm))
-		return;
-
-	/* The page_table_lock ensures mm_users won't change under us */
-	spin_lock(&mm->page_table_lock);
-	spin_lock(mm->context.cop_lockp);
-
-	mm->context.acop &= ~acop;
-
-	free_pid = disable_cop_pid(mm);
-	sync_cop(mm);
-
-	/*
-	 * If this is a threaded process then there might be other threads
-	 * running. We need to send an IPI to force them to pick up any
-	 * change in PID and ACOP.
-	 */
-	if (atomic_read(&mm->mm_users) > 1)
-		smp_call_function(sync_cop, mm, 1);
-
-	if (free_pid != COP_PID_NONE)
-		free_cop_pid(free_pid);
-
-	spin_unlock(mm->context.cop_lockp);
-	spin_unlock(&mm->page_table_lock);
-}
-EXPORT_SYMBOL_GPL(drop_cop);
-
-static int acop_use_cop(int ct)
-{
-	/* There is no alternate policy, yet */
-	return -1;
-}
-
-/*
- * Get the instruction word at the NIP
- */
-static u32 acop_get_inst(struct pt_regs *regs)
-{
-	u32 inst;
-	u32 __user *p;
-
-	p = (u32 __user *)regs->nip;
-	if (!access_ok(VERIFY_READ, p, sizeof(*p)))
-		return 0;
-
-	if (__get_user(inst, p))
-		return 0;
-
-	return inst;
-}
-
-/**
- * @regs: registers at time of interrupt
- * @address: storage address
- * @error_code: Fault code, usually the DSISR or ESR depending on
- *		processor type
- *
- * Return 0 if we are able to resolve the data storage fault that
- * results from a CT miss in the ACOP register.
- */
-int acop_handle_fault(struct pt_regs *regs, unsigned long address,
-		      unsigned long error_code)
-{
-	int ct;
-	u32 inst = 0;
-
-	if (!cpu_has_feature(CPU_FTR_ICSWX)) {
-		pr_info("No coprocessors available");
-		_exception(SIGILL, regs, ILL_ILLOPN, address);
-	}
-
-	if (!user_mode(regs)) {
-		/* this could happen if the HV denies the
-		 * kernel access, for now we just die */
-		die("ICSWX from kernel failed", regs, SIGSEGV);
-	}
-
-	/* Some implementations leave us a hint for the CT */
-	ct = ICSWX_GET_CT_HINT(error_code);
-	if (ct < 0) {
-		/* we have to peek at the instruction word to figure out CT */
-		u32 ccw;
-		u32 rs;
-
-		inst = acop_get_inst(regs);
-		if (inst == 0)
-			return -1;
-
-		rs = (inst >> (31 - 10)) & 0x1f;
-		ccw = regs->gpr[rs];
-		ct = (ccw >> 16) & 0x3f;
-	}
-
-	/*
-	 * We could be here because another thread has enabled acop
-	 * but the ACOP register has yet to be updated.
-	 *
-	 * This should have been taken care of by the IPI to sync all
-	 * the threads (see smp_call_function(sync_cop, mm, 1)), but
-	 * that could take forever if there are a significant amount
-	 * of threads.
-	 *
-	 * Given the number of threads on some of these systems,
-	 * perhaps this is the best way to sync ACOP rather than whack
-	 * every thread with an IPI.
-	 */
-	if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) {
-		sync_cop(current->active_mm);
-		return 0;
-	}
-
-	/* check for alternate policy */
-	if (!acop_use_cop(ct))
-		return 0;
-
-	/* at this point the CT is unknown to the system */
-	pr_warn("%s[%d]: Coprocessor %d is unavailable\n",
-		current->comm, current->pid, ct);
-
-	/* get inst if we don't already have it */
-	if (inst == 0) {
-		inst = acop_get_inst(regs);
-		if (inst == 0)
-			return -1;
-	}
-
-	/* Check if the instruction is the "record form" */
-	if (inst & 1) {
-		/*
-		 * the instruction is "record" form so we can reject
-		 * using CR0
-		 */
-		regs->ccr &= ~(0xful << 28);
-		regs->ccr |= ICSWX_RC_NOT_FOUND << 28;
-
-		/* Move on to the next instruction */
-		regs->nip += 4;
-	} else {
-		/*
-		 * There is no architected mechanism to report a bad
-		 * CT so we could either SIGILL or report nothing.
-		 * Since the non-record version should only bu used
-		 * for "hints" or "don't care" we should probably do
-		 * nothing.  However, I could see how some people
-		 * might want an SIGILL so it here if you want it.
-		 */
-#ifdef CONFIG_PPC_ICSWX_USE_SIGILL
-		_exception(SIGILL, regs, ILL_ILLOPN, address);
-#else
-		regs->nip += 4;
-#endif
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(acop_handle_fault);
diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h
deleted file mode 100644
index 6dedc08e62c8..000000000000
--- a/arch/powerpc/mm/icswx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _ARCH_POWERPC_MM_ICSWX_H_
-#define _ARCH_POWERPC_MM_ICSWX_H_
-
-/*
- *  ICSWX and ACOP Management
- *
- *  Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/mmu_context.h>
-
-/* also used to denote that PIDs are not used */
-#define COP_PID_NONE 0
-
-static inline void sync_cop(void *arg)
-{
-	struct mm_struct *mm = arg;
-
-	if (mm == current->active_mm)
-		switch_cop(current->active_mm);
-}
-
-#ifdef CONFIG_PPC_ICSWX_PID
-extern int get_cop_pid(struct mm_struct *mm);
-extern int disable_cop_pid(struct mm_struct *mm);
-extern void free_cop_pid(int free_pid);
-#else
-#define get_cop_pid(m) (COP_PID_NONE)
-#define disable_cop_pid(m) (COP_PID_NONE)
-#define free_cop_pid(p)
-#endif
-
-/*
- * These are implementation bits for architected registers.  If this
- * ever becomes architecture the should be moved to reg.h et. al.
- */
-/* UCT is the same bit for Server and Embedded */
-#define ICSWX_DSI_UCT		0x00004000  /* Unavailable Coprocessor Type */
-
-#ifdef CONFIG_PPC_BOOK3E
-/* Embedded implementation gives us no hints as to what the CT is */
-#define ICSWX_GET_CT_HINT(x) (-1)
-#else
-/* Server implementation contains the CT value in the DSISR */
-#define ICSWX_DSISR_CTMASK	0x00003f00
-#define ICSWX_GET_CT_HINT(x)	(((x) & ICSWX_DSISR_CTMASK) >> 8)
-#endif
-
-#define ICSWX_RC_STARTED	0x8	/* The request has been started */
-#define ICSWX_RC_NOT_IDLE	0x4	/* No coprocessor found idle */
-#define ICSWX_RC_NOT_FOUND	0x2	/* No coprocessor found */
-#define ICSWX_RC_UNDEFINED	0x1	/* Reserved */
-
-extern int acop_handle_fault(struct pt_regs *regs, unsigned long address,
-			     unsigned long error_code);
-
-static inline u64 acop_copro_type_bit(unsigned int type)
-{
-	return 1ULL << (63 - type);
-}
-
-#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */
diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c
deleted file mode 100644
index 91e30eb7d054..000000000000
--- a/arch/powerpc/mm/icswx_pid.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  ICSWX and ACOP/PID Management
- *
- *  Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/module.h>
-#include "icswx.h"
-
-#define COP_PID_MIN (COP_PID_NONE + 1)
-#define COP_PID_MAX (0xFFFF)
-
-static DEFINE_SPINLOCK(mmu_context_acop_lock);
-static DEFINE_IDA(cop_ida);
-
-static int new_cop_pid(struct ida *ida, int min_id, int max_id,
-		       spinlock_t *lock)
-{
-	int index;
-	int err;
-
-again:
-	if (!ida_pre_get(ida, GFP_KERNEL))
-		return -ENOMEM;
-
-	spin_lock(lock);
-	err = ida_get_new_above(ida, min_id, &index);
-	spin_unlock(lock);
-
-	if (err == -EAGAIN)
-		goto again;
-	else if (err)
-		return err;
-
-	if (index > max_id) {
-		spin_lock(lock);
-		ida_remove(ida, index);
-		spin_unlock(lock);
-		return -ENOMEM;
-	}
-
-	return index;
-}
-
-int get_cop_pid(struct mm_struct *mm)
-{
-	int pid;
-
-	if (mm->context.cop_pid == COP_PID_NONE) {
-		pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
-				  &mmu_context_acop_lock);
-		if (pid >= 0)
-			mm->context.cop_pid = pid;
-	}
-	return mm->context.cop_pid;
-}
-
-int disable_cop_pid(struct mm_struct *mm)
-{
-	int free_pid = COP_PID_NONE;
-
-	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
-		free_pid = mm->context.cop_pid;
-		mm->context.cop_pid = COP_PID_NONE;
-	}
-	return free_pid;
-}
-
-void free_cop_pid(int free_pid)
-{
-	spin_lock(&mmu_context_acop_lock);
-	ida_remove(&cop_ida, free_pid);
-	spin_unlock(&mmu_context_acop_lock);
-}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 8a7c38b8d335..6419b33ca309 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -113,6 +113,12 @@ void __init MMU_setup(void)
 		__map_without_bats = 1;
 		__map_without_ltlbs = 1;
 	}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+	if (rodata_enabled) {
+		__map_without_bats = 1;
+		__map_without_ltlbs = 1;
+	}
+#endif
 }
 
 /*
@@ -132,8 +138,6 @@ void __init MMU_init(void)
 	 * Reserve gigantic pages for hugetlb.  This MUST occur before
 	 * lowmem_end_addr is initialized below.
 	 */
-	reserve_hugetlb_gpages();
-
 	if (memblock.memory.cnt > 1) {
 #ifndef CONFIG_WII
 		memblock_enforce_memory_limit(memblock.memory.regions[0].size);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 5b4c25d12ff3..588a521966ec 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,7 +356,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
 
-#elif defined(CONFIG_FLATMEM)
+#else
 
 struct page *realmode_pfn_to_page(unsigned long pfn)
 {
@@ -365,7 +365,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
 
-#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 #ifdef CONFIG_PPC_STD_MMU_64
 static bool disable_radix;
@@ -381,7 +381,7 @@ early_param("disable_radix", parse_disable_radix);
  * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
  * radix.  If not, we clear the radix feature bit so we fall back to hash.
  */
-static void early_check_vec5(void)
+static void __init early_check_vec5(void)
 {
 	unsigned long root, chosen;
 	int size;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 46b4e67d2372..4362b86ef84c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -436,7 +436,7 @@ void flush_dcache_icache_page(struct page *page)
 		return;
 	}
 #endif
-#if defined(CONFIG_8xx) || defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
 	/* On 8xx there is no need to kmap since highmem is not supported */
 	__flush_dcache_icache(page_address(page));
 #else
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 000000000000..0f613bc63c50
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,99 @@
+/*
+ *  Common implementation of switch_mm_irqs_off
+ *
+ *  Copyright IBM Corp. 2017
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+
+#include <asm/mmu_context.h>
+
+#if defined(CONFIG_PPC32)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	/* 32-bit keeps track of the current PGDIR in the thread struct */
+	tsk->thread.pgdir = mm->pgd;
+}
+#elif defined(CONFIG_PPC_BOOK3E_64)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	/* 64-bit Book3E keeps track of current PGD in the PACA */
+	get_paca()->pgd = mm->pgd;
+}
+#else
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm) { }
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+	atomic_inc(&mm->context.active_cpus);
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+#endif
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			struct task_struct *tsk)
+{
+	bool new_on_cpu = false;
+
+	/* Mark this context has been used on the new CPU */
+	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+		inc_mm_active_cpus(next);
+
+		/*
+		 * This full barrier orders the store to the cpumask above vs
+		 * a subsequent operation which allows this CPU to begin loading
+		 * translations for next.
+		 *
+		 * When using the radix MMU that operation is the load of the
+		 * MMU context id, which is then moved to SPRN_PID.
+		 *
+		 * For the hash MMU it is either the first load from slb_cache
+		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
+		 * copy_mm_to_paca().
+		 *
+		 * On the read side the barrier is in pte_xchg(), which orders
+		 * the store to the PTE vs the load of mm_cpumask.
+		 */
+		smp_mb();
+
+		new_on_cpu = true;
+	}
+
+	/* Some subarchs need to track the PGD elsewhere */
+	switch_mm_pgdir(tsk, next);
+
+	/* Nothing else to do if we aren't actually switching */
+	if (prev == next)
+		return;
+
+	/*
+	 * We must stop all altivec streams before changing the HW
+	 * context
+	 */
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		asm volatile ("dssall");
+
+	if (new_on_cpu)
+		radix_kvm_prefetch_workaround(next);
+
+	/*
+	 * The actual HW switching method differs between the various
+	 * sub architectures. Out of line for now
+	 */
+	switch_mmu_context(prev, next, tsk);
+}
+
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a75f63833284..05e15386d4cb 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -25,8 +25,6 @@
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 
-#include "icswx.h"
-
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
@@ -165,16 +163,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 		return index;
 
 	mm->context.id = index;
-#ifdef CONFIG_PPC_ICSWX
-	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
-	if (!mm->context.cop_lockp) {
-		__destroy_context(index);
-		subpage_prot_free(mm);
-		mm->context.id = MMU_NO_CONTEXT;
-		return -ENOMEM;
-	}
-	spin_lock_init(mm->context.cop_lockp);
-#endif /* CONFIG_PPC_ICSWX */
 
 #ifdef CONFIG_PPC_64K_PAGES
 	mm->context.pte_frag = NULL;
@@ -182,6 +170,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(mm);
 #endif
+	atomic_set(&mm->context.active_cpus, 0);
+
 	return 0;
 }
 
@@ -226,12 +216,6 @@ void destroy_context(struct mm_struct *mm)
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
 #endif
-#ifdef CONFIG_PPC_ICSWX
-	drop_cop(mm->context.acop, mm);
-	kfree(mm->context.cop_lockp);
-	mm->context.cop_lockp = NULL;
-#endif /* CONFIG_PPC_ICSWX */
-
 	if (radix_enabled()) {
 		/*
 		 * Radix doesn't have a valid bit in the process table
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index d46128b22150..57fbc554c785 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -27,7 +27,7 @@
 /*
  * On 40x and 8xx, we directly inline tlbia and tlbivax
  */
-#if defined(CONFIG_40x) || defined(CONFIG_8xx)
+#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx)
 static inline void _tlbil_all(void)
 {
 	asm volatile ("sync; tlbia; isync" : : : "memory");
@@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid)
 }
 #define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
 
-#else /* CONFIG_40x || CONFIG_8xx */
+#else /* CONFIG_40x || CONFIG_PPC_8xx */
 extern void _tlbil_all(void);
 extern void _tlbil_pid(unsigned int pid);
 #ifdef CONFIG_PPC_BOOK3E
@@ -46,12 +46,12 @@ extern void _tlbil_pid_noind(unsigned int pid);
 #else
 #define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
 #endif
-#endif /* !(CONFIG_40x || CONFIG_8xx) */
+#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */
 
 /*
  * On 8xx, we directly inline tlbie, on others, it's extern
  */
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
 static inline void _tlbil_va(unsigned long address, unsigned int pid,
 			     unsigned int tsize, unsigned int ind)
 {
@@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
 {
 	__tlbil_va(address, pid);
 }
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
 
 #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
 extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 31eed8fa8e99..3b65917785a5 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -9,6 +9,7 @@
 
 #include <linux/sched.h>
 #include <linux/mm_types.h>
+#include <misc/cxl-base.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -64,6 +65,27 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	trace_hugepage_set_pmd(addr, pmd_val(pmd));
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
+
+static void do_nothing(void *unused)
+{
+
+}
+/*
+ * Serialize against find_current_mm_pte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_current_mm_pte to finish.
+ */
+void serialize_against_pte_lookup(struct mm_struct *mm)
+{
+	smp_mb();
+	smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+}
+
 /*
  * We use this to invalidate a pmdp entry before switching from a
  * hugepte to regular pmd entry.
@@ -77,7 +99,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 	 * This ensures that generic code that rely on IRQ disabling
 	 * to prevent a parallel THP split work as expected.
 	 */
-	kick_all_cpus_sync();
+	serialize_against_pte_lookup(vma->vm_mm);
 }
 
 static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 443a2c66a304..ec277913e01b 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -239,7 +239,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
 	 * by sending an IPI to all the cpus and executing a dummy
 	 * function there.
 	 */
-	kick_all_cpus_sync();
+	serialize_against_pte_lookup(vma->vm_mm);
 	/*
 	 * Now invalidate the hpte entries in the range
 	 * covered by pmd. This make sure we take a
@@ -329,7 +329,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 	unsigned int psize;
 	unsigned long vsid;
 	unsigned long flags = 0;
-	const struct cpumask *tmp;
 
 	/* get the base page size,vsid and segment size */
 #ifdef CONFIG_DEBUG_VM
@@ -350,8 +349,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 		ssize = mmu_kernel_ssize;
 	}
 
-	tmp = cpumask_of(smp_processor_id());
-	if (cpumask_equal(mm_cpumask(mm), tmp))
+	if (mm_is_thread_local(mm))
 		flags |= HPTE_LOCAL_UPDATE;
 
 	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
@@ -380,16 +378,16 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
 	 */
 	memset(pgtable, 0, PTE_FRAG_SIZE);
 	/*
-	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * Serialize against find_current_mm_pte variants which does lock-less
 	 * lookup in page tables with local interrupts disabled. For huge pages
 	 * it casts pmd_t to pte_t. Since format of pte_t is different from
 	 * pmd_t we want to prevent transit from pmd pointing to page table
 	 * to pmd pointing to huge page (and back) while interrupts are disabled.
 	 * We clear pmd to possibly replace it with page table pointer in
 	 * different code paths. So make sure we wait for the parallel
-	 * find_linux_pte_or_hugepage to finish.
+	 * find_curren_mm_pte to finish.
 	 */
-	kick_all_cpus_sync();
+	serialize_against_pte_lookup(mm);
 	return old_pmd;
 }
 
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 671a45d86c18..39c252b54d16 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,10 +8,15 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+
+#define pr_fmt(fmt) "radix-mmu: " fmt
+
+#include <linux/kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
+#include <linux/string_helpers.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -31,9 +36,13 @@ unsigned int mmu_base_pid;
 static int native_register_process_table(unsigned long base, unsigned long pg_sz,
 					 unsigned long table_size)
 {
-	unsigned long patb1 = base | table_size | PATB_GR;
+	unsigned long patb0, patb1;
+
+	patb0 = be64_to_cpu(partition_tb[0].patb0);
+	patb1 = base | table_size | PATB_GR;
+
+	mmu_partition_table_set_entry(0, patb0, patb1);
 
-	partition_tb->patb1 = cpu_to_be64(patb1);
 	return 0;
 }
 
@@ -179,10 +188,14 @@ static inline void __meminit print_mapping(unsigned long start,
 					   unsigned long end,
 					   unsigned long size)
 {
+	char buf[10];
+
 	if (end <= start)
 		return;
 
-	pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
+	string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+
+	pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
@@ -526,6 +539,7 @@ void __init radix__early_init_mmu(void)
 	__kernel_virt_size = RADIX_KERN_VIRT_SIZE;
 	__vmalloc_start = RADIX_VMALLOC_START;
 	__vmalloc_end = RADIX_VMALLOC_END;
+	__kernel_io_start = RADIX_KERN_IO_START;
 	vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
 
@@ -836,9 +850,12 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
 	 */
 	pmd = *pmdp;
 	pmd_clear(pmdp);
+
 	/*FIXME!!  Verify whether we need this kick below */
-	kick_all_cpus_sync();
-	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	serialize_against_pte_lookup(vma->vm_mm);
+
+	radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
 	return pmd;
 }
 
@@ -897,16 +914,16 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
 	old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
 	old_pmd = __pmd(old);
 	/*
-	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * Serialize against find_current_mm_pte which does lock-less
 	 * lookup in page tables with local interrupts disabled. For huge pages
 	 * it casts pmd_t to pte_t. Since format of pte_t is different from
 	 * pmd_t we want to prevent transit from pmd pointing to page table
 	 * to pmd pointing to huge page (and back) while interrupts are disabled.
 	 * We clear pmd to possibly replace it with page table pointer in
 	 * different code paths. So make sure we wait for the parallel
-	 * find_linux_pte_or_hugepage to finish.
+	 * find_current_mm_pte to finish.
 	 */
-	kick_all_cpus_sync();
+	serialize_against_pte_lookup(mm);
 	return old_pmd;
 }
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a9e4bfc025bc..65eda1997c3f 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -34,6 +34,7 @@
 #include <asm/fixmap.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/sections.h>
 
 #include "mmu_decl.h"
 
@@ -242,7 +243,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
 /*
  * Map in a chunk of physical memory starting at start.
  */
-void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
+static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
 {
 	unsigned long v, s, f;
 	phys_addr_t p;
@@ -294,7 +295,7 @@ void __init mapin_ram(void)
  * Returns true (1) if PTE was found, zero otherwise.  The pointer to
  * the PTE pointer is unmodified if PTE is not found.
  */
-int
+static int
 get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
 {
         pgd_t	*pgd;
@@ -323,9 +324,7 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
         return(retval);
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
-static int __change_page_attr(struct page *page, pgprot_t prot)
+static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
 {
 	pte_t *kpte;
 	pmd_t *kpmd;
@@ -339,8 +338,6 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
 		return -EINVAL;
 	__set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
-	wmb();
-	flush_tlb_page(NULL, address);
 	pte_unmap(kpte);
 
 	return 0;
@@ -349,44 +346,65 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 /*
  * Change the page attributes of an page in the linear mapping.
  *
- * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
+ * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
  */
 static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
 	int i, err = 0;
 	unsigned long flags;
+	struct page *start = page;
 
 	local_irq_save(flags);
 	for (i = 0; i < numpages; i++, page++) {
-		err = __change_page_attr(page, prot);
+		err = __change_page_attr_noflush(page, prot);
 		if (err)
 			break;
 	}
+	wmb();
+	flush_tlb_kernel_range((unsigned long)page_address(start),
+			       (unsigned long)page_address(page));
 	local_irq_restore(flags);
 	return err;
 }
 
-
-void __kernel_map_pages(struct page *page, int numpages, int enable)
+void mark_initmem_nx(void)
 {
-	if (PageHighMem(page))
-		return;
+	struct page *page = virt_to_page(_sinittext);
+	unsigned long numpages = PFN_UP((unsigned long)_einittext) -
+				 PFN_DOWN((unsigned long)_sinittext);
 
-	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+	change_page_attr(page, numpages, PAGE_KERNEL);
 }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
 
-static int fixmaps;
-
-void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
 {
-	unsigned long address = __fix_to_virt(idx);
+	struct page *page;
+	unsigned long numpages;
+
+	page = virt_to_page(_stext);
+	numpages = PFN_UP((unsigned long)_etext) -
+		   PFN_DOWN((unsigned long)_stext);
 
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
+	change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+	/*
+	 * mark .rodata as read only. Use __init_begin rather than __end_rodata
+	 * to cover NOTES and EXCEPTION_TABLE.
+	 */
+	page = virt_to_page(__start_rodata);
+	numpages = PFN_UP((unsigned long)__init_begin) -
+		   PFN_DOWN((unsigned long)__start_rodata);
+
+	change_page_attr(page, numpages, PAGE_KERNEL_RO);
+}
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
 		return;
-	}
 
-	map_kernel_page(address, phys, pgprot_val(flags));
-	fixmaps++;
+	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
 }
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0736e94c7615..ac0717a90ca6 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -104,6 +104,8 @@ unsigned long __vmalloc_start;
 EXPORT_SYMBOL(__vmalloc_start);
 unsigned long __vmalloc_end;
 EXPORT_SYMBOL(__vmalloc_end);
+unsigned long __kernel_io_start;
+EXPORT_SYMBOL(__kernel_io_start);
 struct page *vmemmap;
 EXPORT_SYMBOL(vmemmap);
 unsigned long __pte_frag_nr;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index bde378559d01..906a86fe457b 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -121,12 +121,25 @@ slb_miss_kernel_load_vmemmap:
 1:
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
-	/* vmalloc mapping gets the encoding from the PACA as the mapping
-	 * can be demoted from 64K -> 4K dynamically on some machines
+	/*
+	 * r10 contains the ESID, which is the original faulting EA shifted
+	 * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
+	 * which is 0xd00038000. That can't be used as an immediate, even if we
+	 * ignored the 0xd, so we have to load it into a register, and we only
+	 * have one register free. So we must load all of (H_VMALLOC_END >> 28)
+	 * into a register and compare ESID against that.
+	 */
+	lis	r11,(H_VMALLOC_END >> 32)@h	// r11 = 0xffffffffd0000000
+	ori	r11,r11,(H_VMALLOC_END >> 32)@l	// r11 = 0xffffffffd0003800
+	// Rotate left 4, then mask with 0xffffffff0
+	rldic	r11,r11,4,28			// r11 = 0xd00038000
+	cmpld	r10,r11				// if r10 >= r11
+	bge	5f				//   goto io_mapping
+
+	/*
+	 * vmalloc mapping gets the encoding from the PACA as the mapping
+	 * can be demoted from 64K -> 4K dynamically on some machines.
 	 */
-	clrldi	r11,r10,48
-	cmpldi	r11,(H_VMALLOC_SIZE >> 28) - 1
-	bgt	5f
 	lhz	r11,PACAVMALLOCSLLP(r13)
 	b	6f
 5:
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 16ae1bbe13f0..b3e849c4886e 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -54,23 +54,15 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 	 */
 	__tlbiel_pid(pid, 0, ric);
 
-	if (ric == RIC_FLUSH_ALL)
-		/* For the remaining sets, just flush the TLB */
-		ric = RIC_FLUSH_TLB;
+	/* For PWC, only one flush is needed */
+	if (ric == RIC_FLUSH_PWC) {
+		asm volatile("ptesync": : :"memory");
+		return;
+	}
 
+	/* For the remaining sets, just flush the TLB */
 	for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
-		__tlbiel_pid(pid, set, ric);
-
-	asm volatile("ptesync": : :"memory");
-	asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
-static inline void tlbiel_pwc(unsigned long pid)
-{
-	asm volatile("ptesync": : :"memory");
-
-	/* For PWC flush, we don't look at set number */
-	__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+		__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
@@ -146,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
 	preempt_disable();
 	pid = mm->context.id;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+		_tlbiel_pid(pid, RIC_FLUSH_TLB);
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
-void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+#ifndef CONFIG_SMP
+static void radix__local_flush_all_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
-	struct mm_struct *mm = tlb->mm;
-	/*
-	 * If we are doing a full mm flush, we will do a tlb flush
-	 * with RIC_FLUSH_ALL later.
-	 */
-	if (tlb->fullmm)
-		return;
 
 	preempt_disable();
-
 	pid = mm->context.id;
 	if (pid != MMU_NO_CONTEXT)
-		tlbiel_pwc(pid);
-
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	preempt_enable();
 }
-EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+#endif /* CONFIG_SMP */
 
 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 				       int psize)
@@ -208,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		goto no_context;
 
 	if (!mm_is_thread_local(mm))
-		_tlbie_pid(pid, RIC_FLUSH_ALL);
+		_tlbie_pid(pid, RIC_FLUSH_TLB);
 	else
-		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+		_tlbiel_pid(pid, RIC_FLUSH_TLB);
 no_context:
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+static void radix__flush_all_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
-	struct mm_struct *mm = tlb->mm;
 
-	/*
-	 * If we are doing a full mm flush, we will do a tlb flush
-	 * with RIC_FLUSH_ALL later.
-	 */
-	if (tlb->fullmm)
-		return;
 	preempt_disable();
-
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		goto no_context;
 
 	if (!mm_is_thread_local(mm))
-		_tlbie_pid(pid, RIC_FLUSH_PWC);
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
 	else
-		tlbiel_pwc(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 no_context:
 	preempt_enable();
 }
+
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	tlb->need_flush_all = 1;
+}
 EXPORT_SYMBOL(radix__flush_tlb_pwc);
 
 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
@@ -271,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 }
 EXPORT_SYMBOL(radix__flush_tlb_page);
 
+#else /* CONFIG_SMP */
+#define radix__flush_all_mm radix__local_flush_all_mm
 #endif /* CONFIG_SMP */
 
 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
@@ -288,6 +271,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 {
 	struct mm_struct *mm = vma->vm_mm;
+
 	radix__flush_tlb_mm(mm);
 }
 EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -319,7 +303,10 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 */
 	if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
 		radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
-	else
+	else if (tlb->need_flush_all) {
+		tlb->need_flush_all = 0;
+		radix__flush_all_mm(mm);
+	} else
 		radix__flush_tlb_mm(mm);
 }
 
@@ -364,6 +351,43 @@ err_out:
 	preempt_enable();
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+{
+	int local = mm_is_thread_local(mm);
+	unsigned long ap = mmu_get_ap(mmu_virtual_psize);
+	unsigned long pid, end;
+
+
+	pid = mm ? mm->context.id : 0;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+
+	/* 4k page size, just blow the world */
+	if (PAGE_SIZE == 0x1000) {
+		radix__flush_all_mm(mm);
+		return;
+	}
+
+	/* Otherwise first do the PWC */
+	if (local)
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+	else
+		_tlbie_pid(pid, RIC_FLUSH_PWC);
+
+	/* Then iterate the pages */
+	end = addr + HPAGE_PMD_SIZE;
+	for (; addr < end; addr += PAGE_SIZE) {
+		if (local)
+			_tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+		else
+			_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+	}
+no_context:
+	preempt_enable();
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
 			      unsigned long page_size)
 {
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index b5b0fb97b9c0..881ebd53ffc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -29,6 +29,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/bug.h>
+#include <asm/pte-walk.h>
+
 
 #include <trace/events/thp.h>
 
@@ -138,13 +140,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
  */
 void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 {
-	const struct cpumask *tmp;
-	int i, local = 0;
+	int i, local;
 
 	i = batch->index;
-	tmp = cpumask_of(smp_processor_id());
-	if (cpumask_equal(mm_cpumask(batch->mm), tmp))
-		local = 1;
+	local = mm_is_thread_local(batch->mm);
 	if (i == 1)
 		flush_hash_page(batch->vpn[0], batch->pte[0],
 				batch->psize, batch->ssize, local);
@@ -207,8 +206,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 	local_irq_save(flags);
 	arch_enter_lazy_mmu_mode();
 	for (; start < end; start += PAGE_SIZE) {
-		pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp,
-							&hugepage_shift);
+		pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp,
+						  &hugepage_shift);
 		unsigned long pte;
 
 		if (ptep == NULL)
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index eabecfcaef7c..048b8e9f4492 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -60,7 +60,7 @@ _GLOBAL(__tlbil_va)
 	isync
 1:	blr
 
-#elif defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_8xx)
 
 /*
  * Nothing to do for 8xx, everything is inline
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 30cf03f53428..47fc6660845d 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -263,6 +263,7 @@ static inline bool is_nearbranch(int offset)
 #define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
 #define COND_NE		(CR0_EQ | COND_CMP_FALSE)
 #define COND_LT		(CR0_LT | COND_CMP_TRUE)
+#define COND_LE		(CR0_GT | COND_CMP_FALSE)
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 861c5af1c9c4..a66e64b0b251 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -25,11 +25,7 @@ int bpf_jit_enable __read_mostly;
 
 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 {
-	int *p = area;
-
-	/* Fill whole space with trap instructions */
-	while (p < (int *)((char *)area + size))
-		*p++ = BREAKPOINT_INSTRUCTION;
+	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
 }
 
 static inline void bpf_flush_icache(void *start, void *end)
@@ -795,12 +791,24 @@ emit_clear:
 		case BPF_JMP | BPF_JSGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
+		case BPF_JMP | BPF_JLT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+			true_cond = COND_LT;
+			goto cond_branch;
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
+		case BPF_JMP | BPF_JLE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+			true_cond = COND_LE;
+			goto cond_branch;
 		case BPF_JMP | BPF_JEQ | BPF_K:
 		case BPF_JMP | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
@@ -817,14 +825,18 @@ emit_clear:
 cond_branch:
 			switch (code) {
 			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JLT | BPF_X:
 			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JLE | BPF_X:
 			case BPF_JMP | BPF_JEQ | BPF_X:
 			case BPF_JMP | BPF_JNE | BPF_X:
 				/* unsigned comparison */
 				PPC_CMPLD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_X:
+			case BPF_JMP | BPF_JSLT | BPF_X:
 			case BPF_JMP | BPF_JSGE | BPF_X:
+			case BPF_JMP | BPF_JSLE | BPF_X:
 				/* signed comparison */
 				PPC_CMPD(dst_reg, src_reg);
 				break;
@@ -834,7 +846,9 @@ cond_branch:
 			case BPF_JMP | BPF_JNE | BPF_K:
 			case BPF_JMP | BPF_JEQ | BPF_K:
 			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JLT | BPF_K:
 			case BPF_JMP | BPF_JGE | BPF_K:
+			case BPF_JMP | BPF_JLE | BPF_K:
 				/*
 				 * Need sign-extended load, so only positive
 				 * values can be used as imm in cmpldi
@@ -849,7 +863,9 @@ cond_branch:
 				}
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_K:
+			case BPF_JMP | BPF_JSLT | BPF_K:
 			case BPF_JMP | BPF_JSGE | BPF_K:
+			case BPF_JMP | BPF_JSLE | BPF_K:
 				/*
 				 * signed comparison, so any 16-bit value
 				 * can be used in cmpdi
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b99a5cb..3f3a5ce66495 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
 				   isa207-common.o power8-pmu.o power9-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
 
+obj-$(CONFIG_PPC_POWERNV)	+= imc-pmu.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 0fc26714780a..0af051a1974e 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -22,6 +22,7 @@
 #ifdef CONFIG_PPC64
 #include "../kernel/ppc32.h"
 #endif
+#include <asm/pte-walk.h>
 
 
 /*
@@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 		return -EFAULT;
 
 	local_irq_save(flags);
-	ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift);
+	ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
 	if (!ptep)
 		goto err_out;
 	if (!shift)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6c2d4168daec..2e3eb7431571 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
 
-		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+		if (event->attr.sample_type &
+		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
 			perf_get_data_addr(regs, &data.addr);
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 000000000000..9ccac86f3463
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,1306 @@
+/*
+ * In-Memory Collection (IMC) Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *           (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *           (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+
+/* Nest IMC data structures and variables */
+
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(nest_init_lock);
+static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
+static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+struct imc_pmu_ref *nest_imc_refc;
+static int nest_pmus;
+
+/* Core IMC data structures and variables */
+
+static cpumask_t core_imc_cpumask;
+struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu *core_imc_pmu;
+
+/* Thread IMC data structures and variables */
+
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+static struct imc_pmu *thread_imc_pmu;
+static int thread_imc_mem_size;
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *imc_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_offset.attr,
+	&format_attr_rvalue.attr,
+	&format_attr_mode.attr,
+	NULL,
+};
+
+static struct attribute_group imc_format_group = {
+	.name = "format",
+	.attrs = imc_format_attrs,
+};
+
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
+	cpumask_t *active_mask;
+
+	switch(imc_pmu->domain){
+	case IMC_DOMAIN_NEST:
+		active_mask = &nest_imc_cpumask;
+		break;
+	case IMC_DOMAIN_CORE:
+		active_mask = &core_imc_cpumask;
+		break;
+	default:
+		return 0;
+	}
+
+	return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+	.attrs = imc_pmu_cpumask_attrs,
+};
+
+/* device_str_attr_create : Populate event "name" and string "str" in attribute */
+static struct attribute *device_str_attr_create(const char *name, const char *str)
+{
+	struct perf_pmu_events_attr *attr;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return NULL;
+	sysfs_attr_init(&attr->attr.attr);
+
+	attr->event_str = str;
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = perf_event_sysfs_show;
+
+	return &attr->attr.attr;
+}
+
+struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
+				  const char *unit, const char *prefix, u32 base)
+{
+	struct imc_events *event;
+	const char *s;
+	u32 reg;
+
+	event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	if (of_property_read_u32(np, "reg", &reg))
+		goto error;
+	/* Add the base_reg value to the "reg" */
+	event->value = base + reg;
+
+	if (of_property_read_string(np, "event-name", &s))
+		goto error;
+
+	event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
+	if (!event->name)
+		goto error;
+
+	if (of_property_read_string(np, "scale", &s))
+		s = scale;
+
+	if (s) {
+		event->scale = kstrdup(s, GFP_KERNEL);
+		if (!event->scale)
+			goto error;
+	}
+
+	if (of_property_read_string(np, "unit", &s))
+		s = unit;
+
+	if (s) {
+		event->unit = kstrdup(s, GFP_KERNEL);
+		if (!event->unit)
+			goto error;
+	}
+
+	return event;
+error:
+	kfree(event->unit);
+	kfree(event->scale);
+	kfree(event->name);
+	kfree(event);
+
+	return NULL;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ *                         and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
+{
+	struct attribute_group *attr_group;
+	struct attribute **attrs, *dev_str;
+	struct device_node *np, *pmu_events;
+	struct imc_events *ev;
+	u32 handle, base_reg;
+	int i=0, j=0, ct;
+	const char *prefix, *g_scale, *g_unit;
+	const char *ev_val_str, *ev_scale_str, *ev_unit_str;
+
+	if (!of_property_read_u32(node, "events", &handle))
+		pmu_events = of_find_node_by_phandle(handle);
+	else
+		return 0;
+
+	/* Did not find any node with a given phandle */
+	if (!pmu_events)
+		return 0;
+
+	/* Get a count of number of child nodes */
+	ct = of_get_child_count(pmu_events);
+
+	/* Get the event prefix */
+	if (of_property_read_string(node, "events-prefix", &prefix))
+		return 0;
+
+	/* Get a global unit and scale data if available */
+	if (of_property_read_string(node, "scale", &g_scale))
+		g_scale = NULL;
+
+	if (of_property_read_string(node, "unit", &g_unit))
+		g_unit = NULL;
+
+	/* "reg" property gives out the base offset of the counters data */
+	of_property_read_u32(node, "reg", &base_reg);
+
+	/* Allocate memory for the events */
+	pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
+	if (!pmu->events)
+		return -ENOMEM;
+
+	ct = 0;
+	/* Parse the events and update the struct */
+	for_each_child_of_node(pmu_events, np) {
+		ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
+		if (ev)
+			pmu->events[ct++] = ev;
+	}
+
+	/* Allocate memory for attribute group */
+	attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+	if (!attr_group)
+		return -ENOMEM;
+
+	/*
+	 * Allocate memory for attributes.
+	 * Since we have count of events for this pmu, we also allocate
+	 * memory for the scale and unit attribute for now.
+	 * "ct" has the total event structs added from the events-parent node.
+	 * So allocate three times the "ct" (this includes event, event_scale and
+	 * event_unit).
+	 */
+	attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
+	if (!attrs) {
+		kfree(attr_group);
+		kfree(pmu->events);
+		return -ENOMEM;
+	}
+
+	attr_group->name = "events";
+	attr_group->attrs = attrs;
+	do {
+		ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
+		dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+		if (!dev_str)
+			continue;
+
+		attrs[j++] = dev_str;
+		if (pmu->events[i]->scale) {
+			ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
+			dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+			if (!dev_str)
+				continue;
+
+			attrs[j++] = dev_str;
+		}
+
+		if (pmu->events[i]->unit) {
+			ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
+			dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+			if (!dev_str)
+				continue;
+
+			attrs[j++] = dev_str;
+		}
+	} while (++i < ct);
+
+	/* Save the event attribute */
+	pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+
+	kfree(pmu->events);
+	return 0;
+}
+
+/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
+static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
+{
+	return per_cpu(local_nest_imc_refc, cpu);
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+	struct imc_pmu **pn = per_nest_pmu_arr;
+	int i;
+
+	if (old_cpu < 0 || new_cpu < 0)
+		return;
+
+	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+	int nid, target = -1;
+	const struct cpumask *l_cpumask;
+	struct imc_pmu_ref *ref;
+
+	/*
+	 * Check in the designated list for this cpu. Dont bother
+	 * if not one of them.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * Now that this cpu is one of the designated,
+	 * find a next cpu a) which is online and b) in same chip.
+	 */
+	nid = cpu_to_node(cpu);
+	l_cpumask = cpumask_of_node(nid);
+	target = cpumask_any_but(l_cpumask, cpu);
+
+	/*
+	 * Update the cpumask with the target cpu and
+	 * migrate the context if needed
+	 */
+	if (target >= 0 && target < nr_cpu_ids) {
+		cpumask_set_cpu(target, &nest_imc_cpumask);
+		nest_change_cpu_context(cpu, target);
+	} else {
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				       get_hard_smp_processor_id(cpu));
+		/*
+		 * If this is the last cpu in this chip then, skip the reference
+		 * count mutex lock and make the reference count on this chip zero.
+		 */
+		ref = get_nest_pmu_ref(cpu);
+		if (!ref)
+			return -EINVAL;
+
+		ref->refc = 0;
+	}
+	return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int res;
+
+	/* Get the cpumask of this node */
+	l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+	/*
+	 * If this is not the first online CPU on this node, then
+	 * just return.
+	 */
+	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * If this is the first online cpu on this node
+	 * disable the nest counters by making an OPAL call.
+	 */
+	res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				     get_hard_smp_processor_id(cpu));
+	if (res)
+		return res;
+
+	/* Make this CPU the designated target for counter collection */
+	cpumask_set_cpu(cpu, &nest_imc_cpumask);
+	return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+				 "perf/powerpc/imc:online",
+				 ppc_nest_imc_cpu_online,
+				 ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+	int rc, node_id;
+	struct imc_pmu_ref *ref;
+
+	if (event->cpu < 0)
+		return;
+
+	node_id = cpu_to_node(event->cpu);
+
+	/*
+	 * See if we need to disable the nest PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * enable or disable the nest counters.
+	 */
+	ref = get_nest_pmu_ref(event->cpu);
+	if (!ref)
+		return;
+
+	/* Take the mutex lock for this node and then decrement the reference count */
+	mutex_lock(&ref->lock);
+	ref->refc--;
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+					    get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			mutex_unlock(&ref->lock);
+			pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		WARN(1, "nest-imc: Invalid event reference count\n");
+		ref->refc = 0;
+	}
+	mutex_unlock(&ref->lock);
+}
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+	int chip_id, rc, node_id;
+	u32 l_config, config = event->attr.config;
+	struct imc_mem_info *pcni;
+	struct imc_pmu *pmu;
+	struct imc_pmu_ref *ref;
+	bool flag = false;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config (event offset) */
+	if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
+		return -EINVAL;
+
+	/*
+	 * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
+	 * Get the base memory addresss for this cpu.
+	 */
+	chip_id = topology_physical_package_id(event->cpu);
+	pcni = pmu->mem_info;
+	do {
+		if (pcni->id == chip_id) {
+			flag = true;
+			break;
+		}
+		pcni++;
+	} while (pcni);
+
+	if (!flag)
+		return -ENODEV;
+
+	/*
+	 * Add the event offset to the base address.
+	 */
+	l_config = config & IMC_EVENT_OFFSET_MASK;
+	event->hw.event_base = (u64)pcni->vbase + l_config;
+	node_id = cpu_to_node(event->cpu);
+
+	/*
+	 * Get the imc_pmu_ref struct for this node.
+	 * Take the mutex lock and then increment the count of nest pmu events
+	 * inited.
+	 */
+	ref = get_nest_pmu_ref(event->cpu);
+	if (!ref)
+		return -EINVAL;
+
+	mutex_lock(&ref->lock);
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+					     get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			mutex_unlock(&ref->lock);
+			pr_err("nest-imc: Unable to start the counters for node %d\n",
+									node_id);
+			return rc;
+		}
+	}
+	++ref->refc;
+	mutex_unlock(&ref->lock);
+
+	event->destroy = nest_imc_counters_release;
+	return 0;
+}
+
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_node() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int core_imc_mem_init(int cpu, int size)
+{
+	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+	struct imc_mem_info *mem_info;
+
+	/*
+	 * alloc_pages_node() will allocate memory for core in the
+	 * local node only.
+	 */
+	phys_id = topology_physical_package_id(cpu);
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	mem_info->id = core_id;
+
+	/* We need only vbase for core counters */
+	mem_info->vbase = page_address(alloc_pages_node(phys_id,
+					  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+					  get_order(size)));
+	if (!mem_info->vbase)
+		return -ENOMEM;
+
+	/* Init the mutex */
+	core_imc_refc[core_id].id = core_id;
+	mutex_init(&core_imc_refc[core_id].lock);
+
+	rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+				__pa((void *)mem_info->vbase),
+				get_hard_smp_processor_id(cpu));
+	if (rc) {
+		free_pages((u64)mem_info->vbase, get_order(size));
+		mem_info->vbase = NULL;
+	}
+
+	return rc;
+}
+
+static bool is_core_imc_mem_inited(int cpu)
+{
+	struct imc_mem_info *mem_info;
+	int core_id = (cpu / threads_per_core);
+
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	if (!mem_info->vbase)
+		return false;
+
+	return true;
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int ret = 0;
+
+	/* Get the cpumask for this core */
+	l_cpumask = cpu_sibling_mask(cpu);
+
+	/* If a cpu for this core is already set, then, don't do anything */
+	if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+		return 0;
+
+	if (!is_core_imc_mem_inited(cpu)) {
+		ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+		if (ret) {
+			pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+			return ret;
+		}
+	}
+
+	/* set the cpu in the mask */
+	cpumask_set_cpu(cpu, &core_imc_cpumask);
+	return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+	unsigned int ncpu, core_id;
+	struct imc_pmu_ref *ref;
+
+	/*
+	 * clear this cpu out of the mask, if not present in the mask,
+	 * don't bother doing anything.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+		return 0;
+
+	/* Find any online cpu in that core except the current "cpu" */
+	ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+	if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+		cpumask_set_cpu(ncpu, &core_imc_cpumask);
+		perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+	} else {
+		/*
+		 * If this is the last cpu in this core then, skip taking refernce
+		 * count mutex lock for this core and directly zero "refc" for
+		 * this core.
+		 */
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+				       get_hard_smp_processor_id(cpu));
+		core_id = cpu / threads_per_core;
+		ref = &core_imc_refc[core_id];
+		if (!ref)
+			return -EINVAL;
+
+		ref->refc = 0;
+	}
+	return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+				 "perf/powerpc/imc_core:online",
+				 ppc_core_imc_cpu_online,
+				 ppc_core_imc_cpu_offline);
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+	int rc, core_id;
+	struct imc_pmu_ref *ref;
+
+	if (event->cpu < 0)
+		return;
+	/*
+	 * See if we need to disable the IMC PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * enable or disable the core counters.
+	 */
+	core_id = event->cpu / threads_per_core;
+
+	/* Take the mutex lock and decrement the refernce count for this core */
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return;
+
+	mutex_lock(&ref->lock);
+	ref->refc--;
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+					    get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			mutex_unlock(&ref->lock);
+			pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		WARN(1, "core-imc: Invalid event reference count\n");
+		ref->refc = 0;
+	}
+	mutex_unlock(&ref->lock);
+}
+
+static int core_imc_event_init(struct perf_event *event)
+{
+	int core_id, rc;
+	u64 config = event->attr.config;
+	struct imc_mem_info *pcmi;
+	struct imc_pmu *pmu;
+	struct imc_pmu_ref *ref;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config (event offset) */
+	if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+		return -EINVAL;
+
+	if (!is_core_imc_mem_inited(event->cpu))
+		return -ENODEV;
+
+	core_id = event->cpu / threads_per_core;
+	pcmi = &core_imc_pmu->mem_info[core_id];
+	if ((!pcmi->vbase))
+		return -ENODEV;
+
+	/* Get the core_imc mutex for this core */
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return -EINVAL;
+
+	/*
+	 * Core pmu units are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the mutex lock and enable the core counters.
+	 * If not, just increment the count in core_imc_refc struct.
+	 */
+	mutex_lock(&ref->lock);
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+					     get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			mutex_unlock(&ref->lock);
+			pr_err("core-imc: Unable to start the counters for core %d\n",
+									core_id);
+			return rc;
+		}
+	}
+	++ref->refc;
+	mutex_unlock(&ref->lock);
+
+	event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
+	event->destroy = core_imc_counters_release;
+	return 0;
+}
+
+/*
+ * Allocates a page of memory for each of the online cpus, and write the
+ * physical base address of that page to the LDBAR for that cpu.
+ *
+ * LDBAR Register Layout:
+ *
+ *  0          4         8         12        16        20        24        28
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | |       [   ]    [                   Counter Address [8:50]
+ *   | * Mode    |
+ *   |           * PB Scope
+ *   * Enable/Disable
+ *
+ *  32        36        40        44        48        52        56        60
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *           Counter Address [8:50]              ]
+ *
+ */
+static int thread_imc_mem_alloc(int cpu_id, int size)
+{
+	u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+	int phys_id = topology_physical_package_id(cpu_id);
+
+	if (!local_mem) {
+		/*
+		 * This case could happen only once at start, since we dont
+		 * free the memory in cpu offline path.
+		 */
+		local_mem = page_address(alloc_pages_node(phys_id,
+				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+				  get_order(size)));
+		if (!local_mem)
+			return -ENOMEM;
+
+		per_cpu(thread_imc_mem, cpu_id) = local_mem;
+	}
+
+	ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+
+	mtspr(SPRN_LDBAR, ldbar_value);
+	return 0;
+}
+
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+	return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+	mtspr(SPRN_LDBAR, 0);
+	return 0;
+}
+
+static int thread_imc_cpu_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+			  "perf/powerpc/imc_thread:online",
+			  ppc_thread_imc_cpu_online,
+			  ppc_thread_imc_cpu_offline);
+}
+
+void thread_imc_pmu_sched_task(struct perf_event_context *ctx,
+				      bool sched_in)
+{
+	int core_id;
+	struct imc_pmu_ref *ref;
+
+	if (!is_core_imc_mem_inited(smp_processor_id()))
+		return;
+
+	core_id = smp_processor_id() / threads_per_core;
+	/*
+	 * imc pmus are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the mutex lock and enable the counters.
+	 * If not, just increment the count in ref count struct.
+	 */
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return;
+
+	if (sched_in) {
+		mutex_lock(&ref->lock);
+		if (ref->refc == 0) {
+			if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+			     get_hard_smp_processor_id(smp_processor_id()))) {
+				mutex_unlock(&ref->lock);
+				pr_err("thread-imc: Unable to start the counter\
+							for core %d\n", core_id);
+				return;
+			}
+		}
+		++ref->refc;
+		mutex_unlock(&ref->lock);
+	} else {
+		mutex_lock(&ref->lock);
+		ref->refc--;
+		if (ref->refc == 0) {
+			if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+			    get_hard_smp_processor_id(smp_processor_id()))) {
+				mutex_unlock(&ref->lock);
+				pr_err("thread-imc: Unable to stop the counters\
+							for core %d\n", core_id);
+				return;
+			}
+		} else if (ref->refc < 0) {
+			ref->refc = 0;
+		}
+		mutex_unlock(&ref->lock);
+	}
+
+	return;
+}
+
+static int thread_imc_event_init(struct perf_event *event)
+{
+	u32 config = event->attr.config;
+	struct task_struct *target;
+	struct imc_pmu *pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config offset */
+	if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+		return -EINVAL;
+
+	target = event->hw.target;
+	if (!target)
+		return -EINVAL;
+
+	event->pmu->task_ctx_nr = perf_sw_context;
+	return 0;
+}
+
+static bool is_thread_imc_pmu(struct perf_event *event)
+{
+	if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
+		return true;
+
+	return false;
+}
+
+static u64 * get_event_base_addr(struct perf_event *event)
+{
+	u64 addr;
+
+	if (is_thread_imc_pmu(event)) {
+		addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
+		return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+	}
+
+	return (u64 *)event->hw.event_base;
+}
+
+static void thread_imc_pmu_start_txn(struct pmu *pmu,
+				     unsigned int txn_flags)
+{
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+	perf_pmu_disable(pmu);
+}
+
+static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+}
+
+static int thread_imc_pmu_commit_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+static u64 imc_read_counter(struct perf_event *event)
+{
+	u64 *addr, data;
+
+	/*
+	 * In-Memory Collection (IMC) counters are free flowing counters.
+	 * So we take a snapshot of the counter value on enable and save it
+	 * to calculate the delta at later stage to present the event counter
+	 * value.
+	 */
+	addr = get_event_base_addr(event);
+	data = be64_to_cpu(READ_ONCE(*addr));
+	local64_set(&event->hw.prev_count, data);
+
+	return data;
+}
+
+static void imc_event_update(struct perf_event *event)
+{
+	u64 counter_prev, counter_new, final_count;
+
+	counter_prev = local64_read(&event->hw.prev_count);
+	counter_new = imc_read_counter(event);
+	final_count = counter_new - counter_prev;
+
+	/* Update the delta to the event count */
+	local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+	/*
+	 * In Memory Counters are free flowing counters. HW or the microcode
+	 * keeps adding to the counter offset in memory. To get event
+	 * counter value, we snapshot the value here and we calculate
+	 * delta at later point.
+	 */
+	imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+	/*
+	 * Take a snapshot and calculate the delta and update
+	 * the event counter values.
+	 */
+	imc_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		imc_event_start(event, flags);
+
+	return 0;
+}
+
+static int thread_imc_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		imc_event_start(event, flags);
+
+	/* Enable the sched_task to start the engine */
+	perf_sched_cb_inc(event->ctx->pmu);
+	return 0;
+}
+
+static void thread_imc_event_del(struct perf_event *event, int flags)
+{
+	/*
+	 * Take a snapshot and calculate the delta and update
+	 * the event counter values.
+	 */
+	imc_event_update(event);
+	perf_sched_cb_dec(event->ctx->pmu);
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+	pmu->pmu.task_ctx_nr = perf_invalid_context;
+	pmu->pmu.add = imc_event_add;
+	pmu->pmu.del = imc_event_stop;
+	pmu->pmu.start = imc_event_start;
+	pmu->pmu.stop = imc_event_stop;
+	pmu->pmu.read = imc_event_update;
+	pmu->pmu.attr_groups = pmu->attr_groups;
+	pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+
+	switch (pmu->domain) {
+	case IMC_DOMAIN_NEST:
+		pmu->pmu.event_init = nest_imc_event_init;
+		pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+		break;
+	case IMC_DOMAIN_CORE:
+		pmu->pmu.event_init = core_imc_event_init;
+		pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+		break;
+	case IMC_DOMAIN_THREAD:
+		pmu->pmu.event_init = thread_imc_event_init;
+		pmu->pmu.sched_task = thread_imc_pmu_sched_task;
+		pmu->pmu.add = thread_imc_event_add;
+		pmu->pmu.del = thread_imc_event_del;
+		pmu->pmu.start_txn = thread_imc_pmu_start_txn;
+		pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
+		pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
+static int init_nest_pmu_ref(void)
+{
+	int nid, i, cpu;
+
+	nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
+								GFP_KERNEL);
+
+	if (!nest_imc_refc)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_node(nid) {
+		/*
+		 * Mutex lock to avoid races while tracking the number of
+		 * sessions using the chip's nest pmu units.
+		 */
+		mutex_init(&nest_imc_refc[i].lock);
+
+		/*
+		 * Loop to init the "id" with the node_id. Variable "i" initialized to
+		 * 0 and will be used as index to the array. "i" will not go off the
+		 * end of the array since the "for_each_node" loops for "N_POSSIBLE"
+		 * nodes only.
+		 */
+		nest_imc_refc[i++].id = nid;
+	}
+
+	/*
+	 * Loop to init the per_cpu "local_nest_imc_refc" with the proper
+	 * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
+	 */
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		for (i = 0; i < num_possible_nodes(); i++) {
+			if (nest_imc_refc[i].id == nid) {
+				per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static void cleanup_all_core_imc_memory(void)
+{
+	int i, nr_cores = num_present_cpus() / threads_per_core;
+	struct imc_mem_info *ptr = core_imc_pmu->mem_info;
+	int size = core_imc_pmu->counter_mem_size;
+
+	/* mem_info will never be NULL */
+	for (i = 0; i < nr_cores; i++) {
+		if (ptr[i].vbase)
+			free_pages((u64)ptr->vbase, get_order(size));
+	}
+
+	kfree(ptr);
+	kfree(core_imc_refc);
+}
+
+static void thread_imc_ldbar_disable(void *dummy)
+{
+	/*
+	 * By Zeroing LDBAR, we disable thread-imc
+	 * updates.
+	 */
+	mtspr(SPRN_LDBAR, 0);
+}
+
+void thread_imc_disable(void)
+{
+	on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
+}
+
+static void cleanup_all_thread_imc_memory(void)
+{
+	int i, order = get_order(thread_imc_mem_size);
+
+	for_each_online_cpu(i) {
+		if (per_cpu(thread_imc_mem, i))
+			free_pages((u64)per_cpu(thread_imc_mem, i), order);
+
+	}
+}
+
+/*
+ * Common function to unregister cpu hotplug callback and
+ * free the memory.
+ * TODO: Need to handle pmu unregistering, which will be
+ * done in followup series.
+ */
+static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
+{
+	if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
+		mutex_lock(&nest_init_lock);
+		if (nest_pmus == 1) {
+			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
+			kfree(nest_imc_refc);
+		}
+
+		if (nest_pmus > 0)
+			nest_pmus--;
+		mutex_unlock(&nest_init_lock);
+	}
+
+	/* Free core_imc memory */
+	if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
+		cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
+		cleanup_all_core_imc_memory();
+	}
+
+	/* Free thread_imc memory */
+	if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+		cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
+		cleanup_all_thread_imc_memory();
+	}
+
+	/* Only free the attr_groups which are dynamically allocated  */
+	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+	kfree(pmu_ptr);
+	return;
+}
+
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
+								int pmu_index)
+{
+	const char *s;
+	int nr_cores, cpu, res;
+
+	if (of_property_read_string(parent, "name", &s))
+		return -ENODEV;
+
+	switch (pmu_ptr->domain) {
+	case IMC_DOMAIN_NEST:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
+		if (!pmu_ptr->pmu.name)
+			return -ENOMEM;
+
+		/* Needed for hotplug/migration */
+		per_nest_pmu_arr[pmu_index] = pmu_ptr;
+		break;
+	case IMC_DOMAIN_CORE:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+		if (!pmu_ptr->pmu.name)
+			return -ENOMEM;
+
+		nr_cores = num_present_cpus() / threads_per_core;
+		pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
+								GFP_KERNEL);
+
+		if (!pmu_ptr->mem_info)
+			return -ENOMEM;
+
+		core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+								GFP_KERNEL);
+
+		if (!core_imc_refc)
+			return -ENOMEM;
+
+		core_imc_pmu = pmu_ptr;
+		break;
+	case IMC_DOMAIN_THREAD:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+		if (!pmu_ptr->pmu.name)
+			return -ENOMEM;
+
+		thread_imc_mem_size = pmu_ptr->counter_mem_size;
+		for_each_online_cpu(cpu) {
+			res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
+			if (res)
+				return res;
+		}
+
+		thread_imc_pmu = pmu_ptr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @parent:	Device tree unit node
+ * @pmu_ptr:	memory allocated for this pmu
+ * @pmu_idx:	Count of nest pmc registered
+ *
+ * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
+ * Handles failure cases and accordingly frees memory.
+ */
+int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
+{
+	int ret;
+
+	ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
+	if (ret)
+		goto err_free;
+
+	switch (pmu_ptr->domain) {
+	case IMC_DOMAIN_NEST:
+		/*
+		* Nest imc pmu need only one cpu per chip, we initialize the
+		* cpumask for the first nest imc pmu and use the same for the
+		* rest. To handle the cpuhotplug callback unregister, we track
+		* the number of nest pmus in "nest_pmus".
+		*/
+		mutex_lock(&nest_init_lock);
+		if (nest_pmus == 0) {
+			ret = init_nest_pmu_ref();
+			if (ret) {
+				mutex_unlock(&nest_init_lock);
+				goto err_free;
+			}
+			/* Register for cpu hotplug notification. */
+			ret = nest_pmu_cpumask_init();
+			if (ret) {
+				mutex_unlock(&nest_init_lock);
+				goto err_free;
+			}
+		}
+		nest_pmus++;
+		mutex_unlock(&nest_init_lock);
+		break;
+	case IMC_DOMAIN_CORE:
+		ret = core_imc_pmu_cpumask_init();
+		if (ret) {
+			cleanup_all_core_imc_memory();
+			return ret;
+		}
+
+		break;
+	case IMC_DOMAIN_THREAD:
+		ret = thread_imc_cpu_init();
+		if (ret) {
+			cleanup_all_thread_imc_memory();
+			return ret;
+		}
+
+		break;
+	default:
+		return  -1;	/* Unknown domain */
+	}
+
+	ret = update_events_in_group(parent, pmu_ptr);
+	if (ret)
+		goto err_free;
+
+	ret = update_pmu_ops(pmu_ptr);
+	if (ret)
+		goto err_free;
+
+	ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+	if (ret)
+		goto err_free;
+
+	pr_info("%s performance monitor hardware support registered\n",
+							pmu_ptr->pmu.name);
+
+	return 0;
+
+err_free:
+	imc_common_cpuhp_mem_free(pmu_ptr);
+	return ret;
+}
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 3f3aa9a7063a..2efee3f196f5 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -99,7 +99,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
 			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
 		else
-			*mmcra |= MMCRA_SDAR_MODE_TLB;
+			*mmcra |= MMCRA_SDAR_MODE_DCACHE;
 	} else
 		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
@@ -488,8 +488,8 @@ static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int
 	return -1;
 }
 
-int isa207_get_alternatives(u64 event, u64 alt[],
-				const unsigned int ev_alt[][MAX_ALT], int size)
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+					const unsigned int ev_alt[][MAX_ALT])
 {
 	int i, j, num_alt = 0;
 	u64 alt_event;
@@ -505,5 +505,30 @@ int isa207_get_alternatives(u64 event, u64 alt[],
 		}
 	}
 
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state, so PM_CYC is equivalent to
+		 * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 */
+		j = num_alt;
+		for (i = 0; i < num_alt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:			/* PMC_CYC */
+				alt[j++] = 0x600f4;	/* PM_RUN_CYC */
+				break;
+			case 0x600f4:
+				alt[j++] = 0x1e;
+				break;
+			case 0x2:			/* PM_INST_CMPL */
+				alt[j++] = 0x500fa;	/* PM_RUN_INST_CMPL */
+				break;
+			case 0x500fa:
+				alt[j++] = 0x2;
+				break;
+			}
+		}
+		num_alt = j;
+	}
+
 	return num_alt;
 }
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 8acbe6e802c7..6c737d675792 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -247,6 +247,7 @@
 #define MMCRA_SDAR_MODE_SHIFT		42
 #define MMCRA_SDAR_MODE_TLB		(1ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_SDAR_MODE_NO_UPDATES	~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_DCACHE		(2ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_IFM_SHIFT			30
 #define MMCRA_THR_CTR_MANT_SHIFT	19
 #define MMCRA_THR_CTR_MANT_MASK		0x7Ful
@@ -287,8 +288,8 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], unsigned long mmcr[],
 				struct perf_event *pevents[]);
 void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
-int isa207_get_alternatives(u64 event, u64 alt[],
-				const unsigned int ev_alt[][MAX_ALT], int size);
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+					const unsigned int ev_alt[][MAX_ALT]);
 void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
 							struct pt_regs *regs);
 void isa207_get_mem_weight(u64 *weight);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5463516e369b..c9356955cab4 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -50,34 +50,11 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
 
 static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
-	int i, j, num_alt = 0;
-
-	num_alt = isa207_get_alternatives(event, alt, event_alternatives,
-					(int)ARRAY_SIZE(event_alternatives));
-	if (flags & PPMU_ONLY_COUNT_RUN) {
-		/*
-		 * We're only counting in RUN state, so PM_CYC is equivalent to
-		 * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
-		 */
-		j = num_alt;
-		for (i = 0; i < num_alt; ++i) {
-			switch (alt[i]) {
-			case PM_CYC:
-				alt[j++] = PM_RUN_CYC;
-				break;
-			case PM_RUN_CYC:
-				alt[j++] = PM_CYC;
-				break;
-			case PM_INST_CMPL:
-				alt[j++] = PM_RUN_INST_CMPL;
-				break;
-			case PM_RUN_INST_CMPL:
-				alt[j++] = PM_INST_CMPL;
-				break;
-			}
-		}
-		num_alt = j;
-	}
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(event_alternatives), flags,
+					  event_alternatives);
 
 	return num_alt;
 }
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 50689180a6c1..e99c6bf4d391 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -16,13 +16,16 @@ EVENT(PM_CYC,					0x0001e)
 EVENT(PM_ICT_NOSLOT_CYC,			0x100f8)
 EVENT(PM_CMPLU_STALL,				0x1e054)
 EVENT(PM_INST_CMPL,				0x00002)
-EVENT(PM_BRU_CMPL,				0x4d05e)
+EVENT(PM_BR_CMPL,				0x4d05e)
 EVENT(PM_BR_MPRED_CMPL,				0x400f6)
 
 /* All L1 D cache load references counted at finish, gated by reject */
 EVENT(PM_LD_REF_L1,				0x100fc)
 /* Load Missed L1 */
 EVENT(PM_LD_MISS_L1_FIN,			0x2c04e)
+EVENT(PM_LD_MISS_L1,				0x3e054)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT,			0x400f0)
 /* Store Missed L1 */
 EVENT(PM_ST_MISS_L1,				0x300f0)
 /* L1 cache data prefetches */
@@ -62,3 +65,7 @@ EVENT(PM_INST_DISP,				0x200f2)
 EVENT(PM_INST_DISP_ALT,				0x300f2)
 /* Alternate Branch event code */
 EVENT(PM_BR_CMPL_ALT,				0x10012)
+/* Branch event that are not strongly biased */
+EVENT(PM_BR_2PATH,				0x20036)
+/* ALternate branch event that are not strongly biased */
+EVENT(PM_BR_2PATH_ALT,				0x40036)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 2280cf87ff9c..24b5b5b7a206 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -109,14 +109,17 @@ static const unsigned int power9_event_alternatives[][MAX_ALT] = {
 	{ PM_INST_DISP,			PM_INST_DISP_ALT },
 	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
 	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
+	{ PM_LD_MISS_L1,		PM_LD_MISS_L1_ALT },
+	{ PM_BR_2PATH,			PM_BR_2PATH_ALT },
 };
 
 static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int num_alt = 0;
 
-	num_alt = isa207_get_alternatives(event, alt, power9_event_alternatives,
-				(int)ARRAY_SIZE(power9_event_alternatives));
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(power9_event_alternatives), flags,
+					  power9_event_alternatives);
 
 	return num_alt;
 }
@@ -125,7 +128,7 @@ GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_ICT_NOSLOT_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
 GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
-GENERIC_EVENT_ATTR(branch-instructions,		PM_BRU_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
 GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
 GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
 GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1_FIN);
@@ -143,7 +146,7 @@ CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PREF_ALL);
 CACHE_EVENT_ATTR(LLC-store-misses,		PM_L2_ST_MISS);
 CACHE_EVENT_ATTR(LLC-stores,			PM_L2_ST);
 CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
-CACHE_EVENT_ATTR(branch-loads,			PM_BRU_CMPL);
+CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
 CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
 CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
 
@@ -152,7 +155,7 @@ static struct attribute *power9_events_attr[] = {
 	GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
 	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
 	GENERIC_EVENT_PTR(PM_INST_CMPL),
-	GENERIC_EVENT_PTR(PM_BRU_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
 	GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
@@ -169,7 +172,7 @@ static struct attribute *power9_events_attr[] = {
 	CACHE_EVENT_PTR(PM_L2_ST_MISS),
 	CACHE_EVENT_PTR(PM_L2_ST),
 	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
-	CACHE_EVENT_PTR(PM_BRU_CMPL),
+	CACHE_EVENT_PTR(PM_BR_CMPL),
 	CACHE_EVENT_PTR(PM_DTLB_MISS),
 	CACHE_EVENT_PTR(PM_ITLB_MISS),
 	NULL
@@ -244,7 +247,7 @@ static int power9_generic_events[] = {
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
 	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
 	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1_FIN,
@@ -370,7 +373,7 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	},
 	[ C(BPU) ] = {
 		[ C(OP_READ) ] = {
-			[ C(RESULT_ACCESS) ] = PM_BRU_CMPL,
+			[ C(RESULT_ACCESS) ] = PM_BR_CMPL,
 			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
 		},
 		[ C(OP_WRITE) ] = {
@@ -459,8 +462,8 @@ static int __init init_power9_pmu(void)
 		 * Power9 DD1 should use PM_BR_CMPL_ALT event code for
 		 * "branches" to provide correct counter value.
 		 */
-		EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT;
-		EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT;
+		EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT;
+		EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT;
 		rc = register_power_pmu(&power9_isa207_pmu);
 	} else {
 		rc = register_power_pmu(&power9_pmu);
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 72b824160660..2c5651992369 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -1,6 +1,6 @@
-obj-$(CONFIG_44x)	+= misc_44x.o
+obj-y	+= misc_44x.o machine_check.o
 ifneq ($(CONFIG_PPC4xx_CPM),y)
-obj-$(CONFIG_44x)	+= idle.o
+obj-y	+= idle.o
 endif
 obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
 obj-$(CONFIG_EBONY)	+= ebony.o
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
new file mode 100644
index 000000000000..034d70d6d335
--- /dev/null
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -0,0 +1,89 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_440A(struct pt_regs *regs)
+{
+	unsigned long reason = regs->dsisr;
+
+	printk("Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP){
+		printk("Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	}
+	else {
+		u32 mcsr = mfspr(SPRN_MCSR);
+		if (mcsr & MCSR_IB)
+			printk("Instruction Read PLB Error\n");
+		if (mcsr & MCSR_DRB)
+			printk("Data Read PLB Error\n");
+		if (mcsr & MCSR_DWB)
+			printk("Data Write PLB Error\n");
+		if (mcsr & MCSR_TLBP)
+			printk("TLB Parity Error\n");
+		if (mcsr & MCSR_ICP){
+			flush_instruction_cache();
+			printk("I-Cache Parity Error\n");
+		}
+		if (mcsr & MCSR_DCSP)
+			printk("D-Cache Search Parity Error\n");
+		if (mcsr & MCSR_DCFP)
+			printk("D-Cache Flush Parity Error\n");
+		if (mcsr & MCSR_IMPE)
+			printk("Machine Check exception is imprecise\n");
+
+		/* Clear MCSR */
+		mtspr(SPRN_MCSR, mcsr);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_PPC_47x
+int machine_check_47x(struct pt_regs *regs)
+{
+	unsigned long reason = regs->dsisr;
+	u32 mcsr;
+
+	printk(KERN_ERR "Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP) {
+		printk(KERN_ERR "Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+		return 0;
+	}
+	mcsr = mfspr(SPRN_MCSR);
+	if (mcsr & MCSR_IB)
+		printk(KERN_ERR "Instruction Read PLB Error\n");
+	if (mcsr & MCSR_DRB)
+		printk(KERN_ERR "Data Read PLB Error\n");
+	if (mcsr & MCSR_DWB)
+		printk(KERN_ERR "Data Write PLB Error\n");
+	if (mcsr & MCSR_TLBP)
+		printk(KERN_ERR "TLB Parity Error\n");
+	if (mcsr & MCSR_ICP) {
+		flush_instruction_cache();
+		printk(KERN_ERR "I-Cache Parity Error\n");
+	}
+	if (mcsr & MCSR_DCSP)
+		printk(KERN_ERR "D-Cache Search Parity Error\n");
+	if (mcsr & PPC47x_MCSR_GPR)
+		printk(KERN_ERR "GPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_FPR)
+		printk(KERN_ERR "FPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_IPR)
+		printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+	/* Clear MCSR */
+	mtspr(SPRN_MCSR, mcsr);
+
+	return 0;
+}
+#endif /* CONFIG_PPC_47x */
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
new file mode 100644
index 000000000000..9779c32db34e
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -0,0 +1,8 @@
+obj-y				+= uic.o machine_check.o
+obj-$(CONFIG_PPC4xx_OCM)	+= ocm.o
+obj-$(CONFIG_4xx_SOC)		+= soc.o
+obj-$(CONFIG_PCI)		+= pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI)	+= hsta_msi.o
+obj-$(CONFIG_PPC4xx_MSI)	+= msi.o
+obj-$(CONFIG_PPC4xx_CPM)	+= cpm.o
+obj-$(CONFIG_PPC4xx_GPIO)	+= gpio.o
diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/platforms/4xx/cpm.c
index ba95adf81d8d..53ff81ca8a3c 100644
--- a/arch/powerpc/sysdev/ppc4xx_cpm.c
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -240,7 +240,7 @@ static int cpm_suspend_enter(suspend_state_t state)
 	return 0;
 }
 
-static struct platform_suspend_ops cpm_suspend_ops = {
+static const struct platform_suspend_ops cpm_suspend_ops = {
 	.valid		= cpm_suspend_valid,
 	.enter		= cpm_suspend_enter,
 };
@@ -278,8 +278,8 @@ static int __init cpm_init(void)
 	dcr_len = dcr_resource_len(np, 0);
 
 	if (dcr_base == 0 || dcr_len == 0) {
-		printk(KERN_ERR "cpm: could not parse dcr property for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n",
+		       np);
 		ret = -EINVAL;
 		goto node_put;
 	}
@@ -287,8 +287,8 @@ static int __init cpm_init(void)
 	cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
 
 	if (!DCR_MAP_OK(cpm.dcr_host)) {
-		printk(KERN_ERR "cpm: failed to map dcr property for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n",
+		       np);
 		ret = -EINVAL;
 		goto node_put;
 	}
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/platforms/4xx/gpio.c
index 5382d04dd872..2238e369cde4 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/platforms/4xx/gpio.c
@@ -198,8 +198,7 @@ static int __init ppc4xx_add_gpiochips(void)
 			goto err;
 		continue;
 err:
-		pr_err("%s: registration failed with status %d\n",
-		       np->full_name, ret);
+		pr_err("%pOF: registration failed with status %d\n", np, ret);
 		kfree(ppc4xx_gc);
 		/* try others anyway */
 	}
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index 9926ad67af76..9926ad67af76 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
new file mode 100644
index 000000000000..aa039dfaf82f
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -0,0 +1,26 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_4xx(struct pt_regs *regs)
+{
+	unsigned long reason = regs->dsisr;
+
+	if (reason & ESR_IMCP) {
+		printk("Instruction");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	} else
+		printk("Data");
+	printk(" machine check in kernel mode.\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/platforms/4xx/msi.c
index 590dab4f47d6..d50417e23add 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -233,8 +233,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
 	/* Get MSI ranges */
 	err = of_address_to_resource(dev->dev.of_node, 0, &res);
 	if (err) {
-		dev_err(&dev->dev, "%s resource error!\n",
-			dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "%pOF resource error!\n", dev->dev.of_node);
 		goto error_out;
 	}
 
diff --git a/arch/powerpc/sysdev/ppc4xx_ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37f5ccb..85d9e37f5ccb 100644
--- a/arch/powerpc/sysdev/ppc4xx_ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/platforms/4xx/pci.c
index 086aca69ecae..73e6b36bcd51 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -32,7 +32,7 @@
 #include <asm/dcr-regs.h>
 #include <mm/mmu_decl.h>
 
-#include "ppc4xx_pci.h"
+#include "pci.h"
 
 static int dma_offset_set;
 
@@ -127,9 +127,9 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
 		 * within 32 bits space
 		 */
 		if (cpu_addr != 0 || pci_addr > 0xffffffff) {
-			printk(KERN_WARNING "%s: Ignored unsupported dma range"
+			printk(KERN_WARNING "%pOF: Ignored unsupported dma range"
 			       " 0x%016llx...0x%016llx -> 0x%016llx\n",
-			       hose->dn->full_name,
+			       hose->dn,
 			       pci_addr, pci_addr + size - 1, cpu_addr);
 			continue;
 		}
@@ -152,8 +152,7 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
 
 	/* We only support one global DMA offset */
 	if (dma_offset_set && pci_dram_offset != res->start) {
-		printk(KERN_ERR "%s: dma-ranges(s) mismatch\n",
-		       hose->dn->full_name);
+		printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn);
 		return -ENXIO;
 	}
 
@@ -161,17 +160,16 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
 	 * DMA bounce buffers
 	 */
 	if (size < total_memory) {
-		printk(KERN_ERR "%s: dma-ranges too small "
+		printk(KERN_ERR "%pOF: dma-ranges too small "
 		       "(size=%llx total_memory=%llx)\n",
-		       hose->dn->full_name, size, (u64)total_memory);
+		       hose->dn, size, (u64)total_memory);
 		return -ENXIO;
 	}
 
 	/* Check we are a power of 2 size and that base is a multiple of size*/
 	if ((size & (size - 1)) != 0  ||
 	    (res->start & (size - 1)) != 0) {
-		printk(KERN_ERR "%s: dma-ranges unaligned\n",
-		       hose->dn->full_name);
+		printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn);
 		return -ENXIO;
 	}
 
@@ -181,8 +179,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
 	if (res->end > 0xffffffff &&
 	    !(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
 	      || of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
-		printk(KERN_ERR "%s: dma-ranges outside of 32 bits space\n",
-		       hose->dn->full_name);
+		printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n",
+		       hose->dn);
 		return -ENXIO;
 	}
  out:
@@ -233,8 +231,7 @@ static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller	*hose,
 	 */
 	if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) ||
 	    size < 0x1000 || (plb_addr & (size - 1)) != 0) {
-		printk(KERN_WARNING "%s: Resource out of range\n",
-		       hose->dn->full_name);
+		printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
 		return -1;
 	}
 	ma = (0xffffffffu << ilog2(size)) | 1;
@@ -266,8 +263,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
 		if (!(res->flags & IORESOURCE_MEM))
 			continue;
 		if (j > 2) {
-			printk(KERN_WARNING "%s: Too many ranges\n",
-			       hose->dn->full_name);
+			printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
 			break;
 		}
 
@@ -292,8 +288,8 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
 	if (j <= 2 && !found_isa_hole && hose->isa_mem_size)
 		if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0,
 					     hose->isa_mem_size, 0, j) == 0)
-			printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
-			       hose->dn->full_name);
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
 }
 
 static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose,
@@ -333,21 +329,20 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
 
 	/* Check if device is enabled */
 	if (!of_device_is_available(np)) {
-		printk(KERN_INFO "%s: Port disabled via device-tree\n",
-		       np->full_name);
+		printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np);
 		return;
 	}
 
 	/* Fetch config space registers address */
 	if (of_address_to_resource(np, 0, &rsrc_cfg)) {
-		printk(KERN_ERR "%s: Can't get PCI config register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get PCI config register base !",
+		       np);
 		return;
 	}
 	/* Fetch host bridge internal registers address */
 	if (of_address_to_resource(np, 3, &rsrc_reg)) {
-		printk(KERN_ERR "%s: Can't get PCI internal register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get PCI internal register base !",
+		       np);
 		return;
 	}
 
@@ -361,7 +356,7 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
 	/* Map registers */
 	reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
 	if (reg == NULL) {
-		printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+		printk(KERN_ERR "%pOF: Can't map registers !", np);
 		goto fail;
 	}
 
@@ -423,8 +418,8 @@ static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller	*hose,
 
 	if (!is_power_of_2(size) || size < 0x1000 ||
 	    (plb_addr & (size - 1)) != 0) {
-		printk(KERN_WARNING "%s: Resource out of range\n",
-		       hose->dn->full_name);
+		printk(KERN_WARNING "%pOF: Resource out of range\n",
+		       hose->dn);
 		return -1;
 	}
 
@@ -467,8 +462,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
 		if (!(res->flags & IORESOURCE_MEM))
 			continue;
 		if (j > 1) {
-			printk(KERN_WARNING "%s: Too many ranges\n",
-			       hose->dn->full_name);
+			printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
 			break;
 		}
 
@@ -493,8 +487,8 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
 	if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
 		if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0,
 					      hose->isa_mem_size, 0, j) == 0)
-			printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
-			       hose->dn->full_name);
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
 }
 
 static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose,
@@ -539,14 +533,14 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
 
 	/* Fetch config space registers address */
 	if (of_address_to_resource(np, 0, &rsrc_cfg)) {
-		printk(KERN_ERR "%s:Can't get PCI-X config register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get PCI-X config register base !",
+		       np);
 		return;
 	}
 	/* Fetch host bridge internal registers address */
 	if (of_address_to_resource(np, 3, &rsrc_reg)) {
-		printk(KERN_ERR "%s: Can't get PCI-X internal register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !",
+		       np);
 		return;
 	}
 
@@ -568,7 +562,7 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
 	/* Map registers */
 	reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
 	if (reg == NULL) {
-		printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+		printk(KERN_ERR "%pOF: Can't map registers !", np);
 		goto fail;
 	}
 
@@ -1246,8 +1240,8 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
 
 	mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
 	if (mbase == NULL) {
-		printk(KERN_ERR "%s: Can't map internal config space !",
-			port->node->full_name);
+		printk(KERN_ERR "%pOF: Can't map internal config space !",
+			port->node);
 		goto done;
 	}
 
@@ -1389,7 +1383,7 @@ static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
 		                    port->index);
 		return;
 	}
-		
+
 	while (timeout_ms--) {
 		val = in_le32(mbase + PECFG_TLDLP);
 
@@ -1448,8 +1442,7 @@ static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
 		ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
 #endif
 	if (ppc4xx_pciex_hwops == NULL) {
-		printk(KERN_WARNING "PCIE: unknown host type %s\n",
-		       np->full_name);
+		printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np);
 		return -ENODEV;
 	}
 
@@ -1730,8 +1723,7 @@ static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port	*port,
 	    (index < 2 && size < 0x100000) ||
 	    (index == 2 && size < 0x100) ||
 	    (plb_addr & (size - 1)) != 0) {
-		printk(KERN_WARNING "%s: Resource out of range\n",
-		       hose->dn->full_name);
+		printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
 		return -1;
 	}
 
@@ -1807,8 +1799,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
 		if (!(res->flags & IORESOURCE_MEM))
 			continue;
 		if (j > 1) {
-			printk(KERN_WARNING "%s: Too many ranges\n",
-			       port->node->full_name);
+			printk(KERN_WARNING "%pOF: Too many ranges\n",
+			       port->node);
 			break;
 		}
 
@@ -1834,8 +1826,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
 		if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
 					       hose->isa_mem_phys, 0,
 					       hose->isa_mem_size, 0, j) == 0)
-			printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
-			       hose->dn->full_name);
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
 
 	/* Configure IO, always 64K starting at 0. We hard wire it to 64K !
 	 * Note also that it -has- to be region index 2 on this HW
@@ -1970,8 +1962,8 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
 				   (hose->first_busno + 1) * 0x100000,
 				   busses * 0x100000);
 		if (cfg_data == NULL) {
-			printk(KERN_ERR "%s: Can't map external config space !",
-			       port->node->full_name);
+			printk(KERN_ERR "%pOF: Can't map external config space !",
+			       port->node);
 			goto fail;
 		}
 		hose->cfg_data = cfg_data;
@@ -1982,13 +1974,13 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
 	 */
 	mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
 	if (mbase == NULL) {
-		printk(KERN_ERR "%s: Can't map internal config space !",
-		       port->node->full_name);
+		printk(KERN_ERR "%pOF: Can't map internal config space !",
+		       port->node);
 		goto fail;
 	}
 	hose->cfg_addr = mbase;
 
-	pr_debug("PCIE %s, bus %d..%d\n", port->node->full_name,
+	pr_debug("PCIE %pOF, bus %d..%d\n", port->node,
 		 hose->first_busno, hose->last_busno);
 	pr_debug("     config space mapped at: root @0x%p, other @0x%p\n",
 		 hose->cfg_addr, hose->cfg_data);
@@ -2100,14 +2092,13 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
 	/* Get the port number from the device-tree */
 	pval = of_get_property(np, "port", NULL);
 	if (pval == NULL) {
-		printk(KERN_ERR "PCIE: Can't find port number for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np);
 		return;
 	}
 	portno = *pval;
 	if (portno >= ppc4xx_pciex_port_count) {
-		printk(KERN_ERR "PCIE: port number out of range for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "PCIE: port number out of range for %pOF\n",
+		       np);
 		return;
 	}
 	port = &ppc4xx_pciex_ports[portno];
@@ -2125,8 +2116,8 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
 	if (ppc4xx_pciex_hwops->want_sdr) {
 		pval = of_get_property(np, "sdr-base", NULL);
 		if (pval == NULL) {
-			printk(KERN_ERR "PCIE: missing sdr-base for %s\n",
-			       np->full_name);
+			printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n",
+			       np);
 			return;
 		}
 		port->sdr_base = *pval;
@@ -2142,29 +2133,26 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
 	} else if (!strcmp(val, "pci")) {
 		port->endpoint = 0;
 	} else {
-		printk(KERN_ERR "PCIE: missing or incorrect device_type for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n",
+		       np);
 		return;
 	}
 
 	/* Fetch config space registers address */
 	if (of_address_to_resource(np, 0, &port->cfg_space)) {
-		printk(KERN_ERR "%s: Can't get PCI-E config space !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np);
 		return;
 	}
 	/* Fetch host bridge internal registers address */
 	if (of_address_to_resource(np, 1, &port->utl_regs)) {
-		printk(KERN_ERR "%s: Can't get UTL register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get UTL register base !", np);
 		return;
 	}
 
 	/* Map DCRs */
 	dcrs = dcr_resource_start(np, 0);
 	if (dcrs == 0) {
-		printk(KERN_ERR "%s: Can't get DCR register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
 		return;
 	}
 	port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.h b/arch/powerpc/platforms/4xx/pci.h
index bb4821938ab1..bb4821938ab1 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.h
+++ b/arch/powerpc/platforms/4xx/pci.h
diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/platforms/4xx/soc.c
index d41134d2f786..5e36508b2a70 100644
--- a/arch/powerpc/sysdev/ppc4xx_soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -90,7 +90,7 @@ static int __init ppc4xx_l2c_probe(void)
 	/* Get l2 cache size */
 	prop = of_get_property(np, "cache-size", NULL);
 	if (prop == NULL) {
-		printk(KERN_ERR "%s: Can't get cache-size!\n", np->full_name);
+		printk(KERN_ERR "%pOF: Can't get cache-size!\n", np);
 		of_node_put(np);
 		return -ENODEV;
 	}
@@ -99,8 +99,7 @@ static int __init ppc4xx_l2c_probe(void)
 	/* Map DCRs */
 	dcrreg = of_get_property(np, "dcr-reg", &len);
 	if (!dcrreg || (len != 4 * sizeof(u32))) {
-		printk(KERN_ERR "%s: Can't get DCR register base !",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
 		of_node_put(np);
 		return -ENODEV;
 	}
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/platforms/4xx/uic.c
index a00949f3e378..8b4dd0da0839 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -243,16 +243,16 @@ static struct uic * __init uic_init_one(struct device_node *node)
 	raw_spin_lock_init(&uic->lock);
 	indexp = of_get_property(node, "cell-index", &len);
 	if (!indexp || (len != sizeof(u32))) {
-		printk(KERN_ERR "uic: Device node %s has missing or invalid "
-		       "cell-index property\n", node->full_name);
+		printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+		       "cell-index property\n", node);
 		return NULL;
 	}
 	uic->index = *indexp;
 
 	dcrreg = of_get_property(node, "dcr-reg", &len);
 	if (!dcrreg || (len != 2*sizeof(u32))) {
-		printk(KERN_ERR "uic: Device node %s has missing or invalid "
-		       "dcr-reg property\n", node->full_name);
+		printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+		       "dcr-reg property\n", node);
 		return NULL;
 	}
 	uic->dcrbase = *dcrreg;
@@ -292,7 +292,7 @@ void __init uic_init_tree(void)
 		      * top-level interrupt controller */
 	primary_uic = uic_init_one(np);
 	if (!primary_uic)
-		panic("Unable to initialize primary UIC %s\n", np->full_name);
+		panic("Unable to initialize primary UIC %pOF\n", np);
 
 	irq_set_default_host(primary_uic->irqhost);
 	of_node_put(np);
@@ -306,8 +306,8 @@ void __init uic_init_tree(void)
 
 			uic = uic_init_one(np);
 			if (! uic)
-				panic("Unable to initialize a secondary UIC %s\n",
-				      np->full_name);
+				panic("Unable to initialize a secondary UIC %pOF\n",
+				      np);
 
 			cascade_virq = irq_of_parse_and_map(np, 0);
 
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index add5a5374fa0..b3097fe6441b 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -363,7 +363,7 @@ static int get_cpmf_mult_x2(void)
  */
 
 /* applies to the IPS_DIV, and PCI_DIV values */
-static struct clk_div_table divtab_2346[] = {
+static const struct clk_div_table divtab_2346[] = {
 	{ .val = 2, .div = 2, },
 	{ .val = 3, .div = 3, },
 	{ .val = 4, .div = 4, },
@@ -372,7 +372,7 @@ static struct clk_div_table divtab_2346[] = {
 };
 
 /* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
-static struct clk_div_table divtab_1234[] = {
+static const struct clk_div_table divtab_1234[] = {
 	{ .val = 1, .div = 1, },
 	{ .val = 2, .div = 2, },
 	{ .val = 3, .div = 3, },
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6b4f4cb7009a..f99e79ee060e 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
 	if (fp)
 		return *fp;
 
-	pr_warning("no %s property in %s node, defaulting to %d\n",
-		   prop_name, np->full_name, DEFAULT_FIFO_SIZE);
+	pr_warning("no %s property in %pOF node, defaulting to %d\n",
+		   prop_name, np, DEFAULT_FIFO_SIZE);
 
 	return DEFAULT_FIFO_SIZE;
 }
@@ -426,15 +426,15 @@ static void __init mpc512x_psc_fifo_init(void)
 
 		psc = of_iomap(np, 0);
 		if (!psc) {
-			pr_err("%s: Can't map %s device\n",
-				__func__, np->full_name);
+			pr_err("%s: Can't map %pOF device\n",
+				__func__, np);
 			continue;
 		}
 
 		/* FIFO space is 4KiB, check if requested size is available */
 		if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
-			pr_err("%s: no fifo space available for %s\n",
-				__func__, np->full_name);
+			pr_err("%s: no fifo space available for %pOF\n",
+				__func__, np);
 			iounmap(psc);
 			/*
 			 * chances are that another device requests less
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 39b49822ace1..1ecbf176d35a 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -99,7 +99,7 @@ static void __init efika_pcisetup(void)
 	bus_range = of_get_property(pcictrl, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING EFIKA_PLATFORM_NAME
-		       ": Can't get bus-range for %s\n", pcictrl->full_name);
+		       ": Can't get bus-range for %pOF\n", pcictrl);
 		goto out_put;
 	}
 
@@ -109,14 +109,14 @@ static void __init efika_pcisetup(void)
 	else
 		printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
 		       bus_range[0], bus_range[1]);
-	printk(" controlled by %s\n", pcictrl->full_name);
+	printk(" controlled by %pOF\n", pcictrl);
 	printk("\n");
 
 	hose = pcibios_alloc_controller(pcictrl);
 	if (!hose) {
 		printk(KERN_WARNING EFIKA_PLATFORM_NAME
-		       ": Can't allocate PCI controller structure for %s\n",
-		       pcictrl->full_name);
+		       ": Can't allocate PCI controller structure for %pOF\n",
+		       pcictrl);
 		goto out_put;
 	}
 
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index a3227040cc86..1fcab233d2f2 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -156,7 +156,7 @@ static void __init media5200_init_irq(void)
 	fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
 	if (!fpga_np)
 		goto out;
-	pr_debug("%s: found fpga node: %s\n", __func__, fpga_np->full_name);
+	pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np);
 
 	media5200_irq.regs = of_iomap(fpga_np, 0);
 	if (!media5200_irq.regs)
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 22645a7c6b8a..9e974b1e1697 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -226,7 +226,7 @@ static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
 	dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
 
 	if ((intsize < 1) || (intspec[0] > 3)) {
-		dev_err(gpt->dev, "bad irq specifier in %s\n", ct->full_name);
+		dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct);
 		return -EINVAL;
 	}
 
@@ -331,7 +331,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
 	if (!of_find_property(node, "gpio-controller", NULL))
 		return;
 
-	gpt->gc.label = kstrdup(node->full_name, GFP_KERNEL);
+	gpt->gc.label = kasprintf(GFP_KERNEL, "%pOF", node);
 	if (!gpt->gc.label) {
 		dev_err(gpt->dev, "out of memory\n");
 		return;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index 00282c2b0cae..af0f79995214 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -369,19 +369,19 @@ mpc52xx_add_bridge(struct device_node *node)
 	const int *bus_range;
 	struct resource rsrc;
 
-	pr_debug("Adding MPC52xx PCI host bridge %s\n", node->full_name);
+	pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node);
 
 	pci_add_flags(PCI_REASSIGN_ALL_BUS);
 
 	if (of_address_to_resource(node, 0, &rsrc) != 0) {
-		printk(KERN_ERR "Can't get %s resources\n", node->full_name);
+		printk(KERN_ERR "Can't get %pOF resources\n", node);
 		return -EINVAL;
 	}
 
 	bus_range = of_get_property(node, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get %s bus-range, assume bus 0\n",
-		       node->full_name);
+		printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n",
+		       node);
 		bus_range = NULL;
 	}
 
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 63c5ab6489c9..96bb55ca61d3 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -128,7 +128,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 		return -ENODEV;
 
 	gc->owner = THIS_MODULE;
-	gc->label = np->full_name;
+	gc->label = kasprintf(GFP_KERNEL, "%pOF", np);
 	gc->can_sleep = 1;
 	gc->ngpio = MCU_NUM_GPIO;
 	gc->base = -1;
@@ -141,6 +141,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
 
 static int mcu_gpiochip_remove(struct mcu *mcu)
 {
+	kfree(mcu->gc.label);
 	gpiochip_remove(&mcu->gc);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 763ffca9628d..a4539c5accb0 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -113,7 +113,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
 unreg:
 		platform_device_del(pdev);
 err:
-		pr_err("%s: registration failed\n", np->full_name);
+		pr_err("%pOF: registration failed\n", np);
 next:
 		i++;
 	}
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 978b85bb3233..7fa3e197871a 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -361,7 +361,7 @@ static int pmc_probe(struct platform_device *ofdev)
 			return -EBUSY;
 	}
 
-	pmc_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+	pmc_regs = ioremap(res.start, sizeof(*pmc_regs));
 
 	if (!pmc_regs) {
 		ret = -ENOMEM;
@@ -374,7 +374,7 @@ static int pmc_probe(struct platform_device *ofdev)
 		goto out_pmc;
 	}
 
-	clock_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+	clock_regs = ioremap(res.start, sizeof(*clock_regs));
 
 	if (!clock_regs) {
 		ret = -ENOMEM;
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 0908abd7e36f..9fb57f78cdbe 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -508,8 +508,8 @@ static void __init p1022_ds_setup_arch(void)
 				 * allocate one static local variable for each
 				 * call to this function.
 				 */
-				pr_info("p1022ds: disabling %s node",
-					np2->full_name);
+				pr_info("p1022ds: disabling %pOF node",
+					np2);
 				of_update_property(np2, &nor_status);
 				of_node_put(np2);
 			}
@@ -524,8 +524,8 @@ static void __init p1022_ds_setup_arch(void)
 					.length = sizeof("disabled"),
 				};
 
-				pr_info("p1022ds: disabling %s node",
-					np2->full_name);
+				pr_info("p1022ds: disabling %pOF node",
+					np2);
 				of_update_property(np2, &nand_status);
 				of_node_put(np2);
 			}
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index cd6ce845f398..77e618dce4a8 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -100,8 +100,8 @@ static void xes_mpc85xx_fixups(void)
 		err = of_address_to_resource(np, 0, &r[0]);
 		if (err) {
 			printk(KERN_WARNING "xes_mpc85xx: Could not get "
-			       "resource for device tree node '%s'",
-			       np->full_name);
+			       "resource for device tree node '%pOF'",
+			       np);
 			continue;
 		}
 
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 80cbcb0ad9b1..536b0c5d5ce3 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -5,7 +5,6 @@ config CPM1
 choice
 	prompt "8xx Machine Type"
 	depends on PPC_8xx
-	depends on 8xx
 	default MPC885ADS
 
 config MPC8XXFADS
@@ -92,7 +91,7 @@ endmenu
 #
 
 menu "MPC8xx CPM Options"
-	depends on 8xx
+	depends on PPC_8xx
 
 # This doesn't really belong here, but it is convenient to ask
 # 8xx specific questions.
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
index 76a81c3350a8..f9af3218bd9c 100644
--- a/arch/powerpc/platforms/8xx/Makefile
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the PowerPC 8xx linux kernel.
 #
-obj-$(CONFIG_PPC_8xx)	  += m8xx_setup.o
+obj-y			+= m8xx_setup.o machine_check.o pic.o
 obj-$(CONFIG_MPC885ADS)   += mpc885ads_setup.o
 obj-$(CONFIG_MPC86XADS)   += mpc86xads_setup.o
 obj-$(CONFIG_PPC_EP88XC)  += ep88xc.o
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index f81069f79a94..1917d69f84df 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -23,7 +23,7 @@
 #include <asm/fs_pd.h>
 #include <mm/mmu_decl.h>
 
-#include <sysdev/mpc8xx_pic.h>
+#include "pic.h"
 
 #include "mpc8xx.h"
 
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
new file mode 100644
index 000000000000..402016705a39
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -0,0 +1,37 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_8xx(struct pt_regs *regs)
+{
+	unsigned long reason = regs->msr;
+
+	pr_err("Machine check in kernel mode.\n");
+	pr_err("Caused by (from SRR1=%lx): ", reason);
+	if (reason & 0x40000000)
+		pr_err("Fetch error at address %lx\n", regs->nip);
+	else
+		pr_err("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+	/* the qspan pci read routines can cause machine checks -- Cort
+	 *
+	 * yuck !!! that totally needs to go away ! There are better ways
+	 * to deal with that than having a wart in the mcheck handler.
+	 * -- BenH
+	 */
+	bad_page_fault(regs, regs->dar, SIGBUS);
+	return 1;
+#else
+	return 0;
+#endif
+}
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/platforms/8xx/pic.c
index 2842f9d63d21..8d5a25d43ef3 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -9,7 +9,7 @@
 #include <asm/io.h>
 #include <asm/8xx_immap.h>
 
-#include "mpc8xx_pic.h"
+#include "pic.h"
 
 
 #define PIC_VEC_SPURRIOUS      15
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.h b/arch/powerpc/platforms/8xx/pic.h
index 9fe00eebdc8b..9fe00eebdc8b 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.h
+++ b/arch/powerpc/platforms/8xx/pic.h
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 2f629e0551e9..13663efc1d31 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -32,7 +32,6 @@ config PPC_85xx
 config PPC_8xx
 	bool "Freescale 8xx"
 	select FSL_SOC
-	select 8xx
 	select PPC_LIB_RHEAP
 	select SYS_SUPPORTS_HUGETLBFS
 
@@ -149,10 +148,6 @@ config 6xx
 	depends on PPC32 && PPC_BOOK3S
 	select PPC_HAVE_PMU_SUPPORT
 
-# this is temp to handle compat with arch=ppc
-config 8xx
-	bool
-
 config E500
 	select FSL_EMB_PERFMON
 	select PPC_FSL_BOOK3E
@@ -271,44 +266,6 @@ config VSX
 
 	  If in doubt, say Y here.
 
-config PPC_ICSWX
-	bool "Support for PowerPC icswx coprocessor instruction"
-	depends on PPC_BOOK3S_64
-	default n
-	---help---
-
-	  This option enables kernel support for the PowerPC Initiate
-	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
-	  and POWER8 processors. POWER9 uses new copy/paste instructions
-	  to invoke the coprocessor.
-
-	  This option is only useful if you have a processor that supports
-	  the icswx coprocessor instruction. It does not have any effect
-	  on processors without the icswx coprocessor instruction.
-
-	  This option slightly increases kernel memory usage.
-
-	  If in doubt, say N here.
-
-config PPC_ICSWX_PID
-	bool "icswx requires direct PID management"
-	depends on PPC_ICSWX
-	default y
-	---help---
-	  The PID register in server is used explicitly for ICSWX.  In
-	  embedded systems PID management is done by the system.
-
-config PPC_ICSWX_USE_SIGILL
-	bool "Should a bad CT cause a SIGILL?"
-	depends on PPC_ICSWX
-	default n
-	---help---
-	  Should a bad CT used for "non-record form ICSWX" cause an
-	  illegal instruction signal or should it be silent as
-	  architected.
-
-	  If in doubt, say N here.
-
 config SPE_POSSIBLE
 	def_bool y
 	depends on E200 || (E500 && !PPC_E500MC)
@@ -413,7 +370,7 @@ config NR_CPUS
 
 config NOT_COHERENT_CACHE
 	bool
-	depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
+	depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
 	default n if PPC_47x
 	default y
 
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 469ef170d218..d7a55ecfaee5 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_FSL_ULI1575)	+= fsl_uli1575.o
 
 obj-$(CONFIG_PPC_PMAC)		+= powermac/
 obj-$(CONFIG_PPC_CHRP)		+= chrp/
+obj-$(CONFIG_4xx)		+= 4xx/
 obj-$(CONFIG_40x)		+= 40x/
 obj-$(CONFIG_44x)		+= 44x/
 obj-$(CONFIG_PPC_MPC512x)	+= 512x/
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 45cb9821173c..b9d466cc2b8a 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -40,7 +40,7 @@ static int __init amigaone_add_bridge(struct device_node *dev)
 	const int *bus_range;
 	struct pci_controller *hose;
 
-	printk(KERN_INFO "Adding PCI host bridge %s\n", dev->full_name);
+	printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev);
 
 	cfg_addr = of_get_address(dev, 0, NULL, NULL);
 	cfg_data = of_get_address(dev, 1, NULL, NULL);
@@ -49,8 +49,8 @@ static int __init amigaone_add_bridge(struct device_node *dev)
 
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if ((bus_range == NULL) || (len < 2 * sizeof(int)))
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-		       " bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
 
 	hose = pcibios_alloc_controller(dev);
 	if (hose == NULL)
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 8d3ae2cc52bf..6ea3f248b155 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -187,8 +187,8 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev)
 
 	irq_domain = irq_find_host(dn);
 	if (!irq_domain) {
-		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
-			dn->full_name);
+		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n",
+			dn);
 		goto out_error;
 	}
 
@@ -326,8 +326,8 @@ static void axon_msi_shutdown(struct platform_device *device)
 	struct axon_msic *msic = dev_get_drvdata(&device->dev);
 	u32 tmp;
 
-	pr_devel("axon_msi: disabling %s\n",
-		 irq_domain_get_of_node(msic->irq_domain)->full_name);
+	pr_devel("axon_msi: disabling %pOF\n",
+		 irq_domain_get_of_node(msic->irq_domain));
 	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
 	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
 	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
@@ -340,12 +340,12 @@ static int axon_msi_probe(struct platform_device *device)
 	unsigned int virq;
 	int dcr_base, dcr_len;
 
-	pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+	pr_devel("axon_msi: setting up dn %pOF\n", dn);
 
 	msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
 	if (!msic) {
-		printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
+		       dn);
 		goto out;
 	}
 
@@ -354,30 +354,30 @@ static int axon_msi_probe(struct platform_device *device)
 
 	if (dcr_base == 0 || dcr_len == 0) {
 		printk(KERN_ERR
-		       "axon_msi: couldn't parse dcr properties on %s\n",
-			dn->full_name);
+		       "axon_msi: couldn't parse dcr properties on %pOF\n",
+			dn);
 		goto out_free_msic;
 	}
 
 	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
 	if (!DCR_MAP_OK(msic->dcr_host)) {
-		printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n",
+		       dn);
 		goto out_free_msic;
 	}
 
 	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
 					     &msic->fifo_phys, GFP_KERNEL);
 	if (!msic->fifo_virt) {
-		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n",
+		       dn);
 		goto out_free_msic;
 	}
 
 	virq = irq_of_parse_and_map(dn, 0);
 	if (!virq) {
-		printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n",
+		       dn);
 		goto out_free_fifo;
 	}
 	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
@@ -385,8 +385,8 @@ static int axon_msi_probe(struct platform_device *device)
 	/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
 	msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
 	if (!msic->irq_domain) {
-		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n",
+		       dn);
 		goto out_free_fifo;
 	}
 
@@ -412,7 +412,7 @@ static int axon_msi_probe(struct platform_device *device)
 
 	axon_msi_debug_setup(dn, msic);
 
-	printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn);
 
 	return 0;
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 871d38479a25..6fc85e29dc08 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -303,8 +303,8 @@ static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
 	iic->node = of_node_get(node);
 	out_be64(&iic->regs->prio, 0);
 
-	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
-	       hw_cpu, iic->target_id, node->full_name);
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n",
+	       hw_cpu, iic->target_id, node);
 }
 
 static int __init setup_iic(void)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 29d4f96ed33e..4b91ad08eefd 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -278,8 +278,8 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base)
 		if (of_node_to_nid(np) != nid)
 			continue;
 		if (of_address_to_resource(np, 0, &r)) {
-			printk(KERN_ERR "iommu: can't get address for %s\n",
-			       np->full_name);
+			printk(KERN_ERR "iommu: can't get address for %pOF\n",
+			       np);
 			continue;
 		}
 		*base = r.start;
@@ -458,8 +458,8 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np)
 
 	ioid = of_get_property(np, "ioid", NULL);
 	if (ioid == NULL) {
-		printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
-		       np->full_name);
+		printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n",
+		       np);
 		return 0;
 	}
 
@@ -559,8 +559,8 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
 	 */
 	iommu = cell_iommu_for_node(dev_to_node(dev));
 	if (iommu == NULL || list_empty(&iommu->windows)) {
-		dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
-		       of_node_full_name(dev->of_node), dev_to_node(dev));
+		dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n",
+		       dev->of_node, dev_to_node(dev));
 		return NULL;
 	}
 	window = list_entry(iommu->windows.next, struct iommu_window, list);
@@ -720,12 +720,12 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
 	/* Get node ID */
 	nid = of_node_to_nid(np);
 	if (nid < 0) {
-		printk(KERN_ERR "iommu: failed to get node for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "iommu: failed to get node for %pOF\n",
+		       np);
 		return NULL;
 	}
-	pr_debug("iommu: setting up iommu for node %d (%s)\n",
-		 nid, np->full_name);
+	pr_debug("iommu: setting up iommu for node %d (%pOF)\n",
+		 nid, np);
 
 	/* XXX todo: If we can have multiple windows on the same IOMMU, which
 	 * isn't the case today, we probably want here to check whether the
@@ -736,8 +736,8 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
 	 */
 
 	if (cbe_nr_iommus >= NR_IOMMUS) {
-		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
-		       np->full_name);
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n",
+		       np);
 		return NULL;
 	}
 
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 460ab392f0e7..2f704afe9af3 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -196,8 +196,8 @@ static int __init cbe_ptcal_enable(void)
 	for_each_node_by_type(np, "cpu") {
 		const u32 *nid = of_get_property(np, "node-id", NULL);
 		if (!nid) {
-			printk(KERN_ERR "%s: node %s is missing node-id?\n",
-					__func__, np->full_name);
+			printk(KERN_ERR "%s: node %pOF is missing node-id?\n",
+					__func__, np);
 			continue;
 		}
 		cbe_ptcal_enable_on_node(*nid, order);
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index f1f7878893f3..d1e61e273e64 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -130,8 +130,8 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
 	struct resource r;
 	unsigned long offset = (unsigned long)data;
 
-	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
-		 np->full_name);
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
+		 np);
 
 	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
 	if (!priv) {
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index ff924af00e78..aa44bfc46467 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -323,8 +323,8 @@ static void __init spider_init_one(struct device_node *of_node, int chip,
 	irq_set_handler_data(virq, pic);
 	irq_set_chained_handler(virq, spider_irq_cascade);
 
-	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
-	       pic->node_id, addr, of_node->full_name);
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n",
+	       pic->node_id, addr, of_node);
 
 	/* Enable the interrupt detection enable bit. Do this last! */
 	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 672d310dcf14..f636ee22b203 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -191,8 +191,8 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 			goto err;
 		}
 		ret = -EINVAL;
-		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
-			 oirq.np->full_name);
+		pr_debug("  irq %d no 0x%x on %pOF\n", i, oirq.args[0],
+			 oirq.np);
 		spu->irqs[i] = irq_create_of_mapping(&oirq);
 		if (!spu->irqs[i]) {
 			pr_debug("spu_new: failed to map it !\n");
@@ -243,32 +243,32 @@ static int __init spu_map_device(struct spu *spu)
 	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
 			       &spu->local_store_phys);
 	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 0\n",
-			 np->full_name);
+		pr_debug("spu_new: failed to map %pOF resource 0\n",
+			 np);
 		goto out;
 	}
 	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
 			       &spu->problem_phys);
 	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 1\n",
-			 np->full_name);
+		pr_debug("spu_new: failed to map %pOF resource 1\n",
+			 np);
 		goto out_unmap;
 	}
 	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
 	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 2\n",
-			 np->full_name);
+		pr_debug("spu_new: failed to map %pOF resource 2\n",
+			 np);
 		goto out_unmap;
 	}
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		ret = spu_map_resource(spu, 3,
 			       (void __iomem**)&spu->priv1, NULL);
 	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 3\n",
-			 np->full_name);
+		pr_debug("spu_new: failed to map %pOF resource 3\n",
+			 np);
 		goto out_unmap;
 	}
-	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("spu_new: %pOF maps:\n", np);
 	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
 		 spu->local_store_phys, spu->local_store);
 	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
@@ -316,8 +316,8 @@ static int __init of_create_spu(struct spu *spu, void *data)
 
 	spu->node = of_node_to_nid(spe);
 	if (spu->node >= MAX_NUMNODES) {
-		printk(KERN_WARNING "SPE %s on node %d ignored,"
-		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "SPE %pOF on node %d ignored,"
+		       " node number too big\n", spe, spu->node);
 		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
 		ret = -ENODEV;
 		goto out;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ae2f740a82f1..5ffcdeb1eb17 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1749,7 +1749,7 @@ out:
 static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file_inode(file);
-	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	int err = file_write_and_wait_range(file, start, end);
 	if (!err) {
 		inode_lock(inode);
 		err = spufs_mfc_flush(file, NULL);
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 1b87e198faa7..27264794f5c0 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -235,14 +235,14 @@ chrp_find_bridges(void)
 		++index;
 		/* The GG2 bridge on the LongTrail doesn't have an address */
 		if (of_address_to_resource(dev, 0, &r) && !is_longtrail) {
-			printk(KERN_WARNING "Can't use %s: no address\n",
-			       dev->full_name);
+			printk(KERN_WARNING "Can't use %pOF: no address\n",
+			       dev);
 			continue;
 		}
 		bus_range = of_get_property(dev, "bus-range", &len);
 		if (bus_range == NULL || len < 2 * sizeof(int)) {
-			printk(KERN_WARNING "Can't get bus-range for %s\n",
-				dev->full_name);
+			printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+				dev);
 			continue;
 		}
 		if (bus_range[1] == bus_range[0])
@@ -250,15 +250,15 @@ chrp_find_bridges(void)
 		else
 			printk(KERN_INFO "PCI buses %d..%d",
 			       bus_range[0], bus_range[1]);
-		printk(" controlled by %s", dev->full_name);
+		printk(" controlled by %pOF", dev);
 		if (!is_longtrail)
 			printk(" at %llx", (unsigned long long)r.start);
 		printk("\n");
 
 		hose = pcibios_alloc_controller(dev);
 		if (!hose) {
-			printk("Can't allocate PCI controller structure for %s\n",
-				dev->full_name);
+			printk("Can't allocate PCI controller structure for %pOF\n",
+				dev);
 			continue;
 		}
 		hose->first_busno = hose->self_busno = bus_range[0];
@@ -297,8 +297,8 @@ chrp_find_bridges(void)
 				}
 			}
 		} else {
-			printk("No methods for %s (model %s), using RTAS\n",
-			       dev->full_name, model);
+			printk("No methods for %pOF (model %s), using RTAS\n",
+			       dev, model);
 			hose->ops = &rtas_pci_ops;
 		}
 
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
index 2b4dc6abde6c..19760712b39d 100644
--- a/arch/powerpc/platforms/chrp/pegasos_eth.c
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -63,7 +63,7 @@ static struct platform_device mv643xx_eth_mvmdio_device = {
 	.name		= "orion-mdio",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(mv643xx_eth_mvmdio_resources),
-	.resource	= mv643xx_eth_shared_resources,
+	.resource	= mv643xx_eth_mvmdio_resources,
 };
 
 static struct resource mv643xx_eth_port1_resources[] = {
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
index f29cf29b11f8..f514d5d28cd4 100644
--- a/arch/powerpc/platforms/embedded6xx/linkstation.c
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -41,12 +41,12 @@ static int __init linkstation_add_bridge(struct device_node *dev)
 	struct pci_controller *hose;
 	const int *bus_range;
 
-	printk("Adding PCI host bridge %s\n", dev->full_name);
+	printk("Adding PCI host bridge %pOF\n", dev);
 
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int))
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-				" bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+				" bus 0\n", dev);
 
 	hose = pcibios_alloc_controller(dev);
 	if (hose == NULL)
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
index 8e3590941960..273dfa3f0252 100644
--- a/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -115,7 +115,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev)
 	struct pci_controller	*hose;
 	unsigned short		devid;
 
-	pr_info("Adding PCI host bridge %s\n", dev->full_name);
+	pr_info("Adding PCI host bridge %pOF\n", dev);
 
 	bus_range = of_get_property(dev, "bus-range", &len);
 
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index 471a50bcd074..ed1914dd34bb 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -44,7 +44,7 @@ static int __init storcenter_add_bridge(struct device_node *dev)
 	struct pci_controller *hose;
 	const int *bus_range;
 
-	printk("Adding PCI host bridge %s\n", dev->full_name);
+	printk("Adding PCI host bridge %pOF\n", dev);
 
 	hose = pcibios_alloc_controller(dev);
 	if (hose == NULL)
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 69794d9389c2..e3821379e86f 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -73,8 +73,8 @@ static void __init fixup_bus_range(struct device_node *bridge)
 	/* Lookup the "bus-range" property for the hose */
 	prop = of_find_property(bridge, "bus-range", &len);
 	if (prop == NULL  || prop->value == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s\n",
-			       bridge->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+			       bridge);
 		return;
 	}
 	bus_range = prop->value;
@@ -498,12 +498,12 @@ static int __init maple_add_bridge(struct device_node *dev)
 	const int *bus_range;
 	int primary = 1;
 
-	DBG("Adding PCI host bridge %s\n", dev->full_name);
+	DBG("Adding PCI host bridge %pOF\n", dev);
 
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
-		dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n",
+		dev);
 	}
 
 	hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index 10c4e8fc6ea9..5ff6108f19e9 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -193,7 +193,7 @@ static int __init pas_add_bridge(struct device_node *dev)
 {
 	struct pci_controller *hose;
 
-	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
 
 	hose = pcibios_alloc_controller(dev);
 	if (!hose)
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 1e02328c3f2d..9e3f39d36e88 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2658,25 +2658,25 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 
 	if (i >= MAX_MACIO_CHIPS) {
 		printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
-		printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name);
+		printk(KERN_ERR "pmac_feature: %pOF skipped\n", node);
 		return;
 	}
 	addrp = of_get_pci_address(node, 0, &size, NULL);
 	if (addrp == NULL) {
-		printk(KERN_ERR "pmac_feature: %s: can't find base !\n",
-		       node->full_name);
+		printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n",
+		       node);
 		return;
 	}
 	addr = of_translate_address(node, addrp);
 	if (addr == 0) {
-		printk(KERN_ERR "pmac_feature: %s, can't translate base !\n",
-		       node->full_name);
+		printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n",
+		       node);
 		return;
 	}
 	base = ioremap(addr, (unsigned long)size);
 	if (!base) {
-		printk(KERN_ERR "pmac_feature: %s, can't map mac-io chip !\n",
-		       node->full_name);
+		printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n",
+		       node);
 		return;
 	}
 	if (type == macio_keylargo || type == macio_keylargo2) {
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index f627c9fd7b48..70183eb3d5c8 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -494,8 +494,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 
 	host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
 	if (host == NULL) {
-		printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
+		       np);
 		return NULL;
 	}
 
@@ -505,8 +505,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	 */
 	addrp = of_get_property(np, "AAPL,address", NULL);
 	if (addrp == NULL) {
-		printk(KERN_ERR "low_i2c: Can't find address for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "low_i2c: Can't find address for %pOF\n",
+		       np);
 		kfree(host);
 		return NULL;
 	}
@@ -538,13 +538,13 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	host->irq = irq_of_parse_and_map(np, 0);
 	if (!host->irq)
 		printk(KERN_WARNING
-		       "low_i2c: Failed to map interrupt for %s\n",
-		       np->full_name);
+		       "low_i2c: Failed to map interrupt for %pOF\n",
+		       np);
 
 	host->base = ioremap((*addrp), 0x1000);
 	if (host->base == NULL) {
-		printk(KERN_ERR "low_i2c: Can't map registers for %s\n",
-		       np->full_name);
+		printk(KERN_ERR "low_i2c: Can't map registers for %pOF\n",
+		       np);
 		kfree(host);
 		return NULL;
 	}
@@ -560,8 +560,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 			"keywest i2c", host))
 		host->irq = 0;
 
-	printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n",
-	       *addrp, host->irq, np->full_name);
+	printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %pOF\n",
+	       *addrp, host->irq, np);
 
 	return host;
 }
@@ -798,7 +798,7 @@ static void __init pmu_i2c_probe(void)
 	if (busnode == NULL)
 		return;
 
-	printk(KERN_INFO "PMU i2c %s\n", busnode->full_name);
+	printk(KERN_INFO "PMU i2c %pOF\n", busnode);
 
 	/*
 	 * We add bus 1 and 2 only for now, bus 0 is "special"
@@ -913,7 +913,7 @@ static void __init smu_i2c_probe(void)
 	if (controller == NULL)
 		return;
 
-	printk(KERN_INFO "SMU i2c %s\n", controller->full_name);
+	printk(KERN_INFO "SMU i2c %pOF\n", controller);
 
 	/* Look for childs, note that they might not be of the right
 	 * type as older device trees mix i2c busses and other things
@@ -945,8 +945,8 @@ static void __init smu_i2c_probe(void)
 		bus->flags = 0;
 		list_add(&bus->link, &pmac_i2c_busses);
 
-		printk(KERN_INFO " channel %x bus %s\n",
-		       bus->channel, busnode->full_name);
+		printk(KERN_INFO " channel %x bus %pOF\n",
+		       bus->channel, busnode);
 	}
 }
 
@@ -1129,7 +1129,7 @@ int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode)
 	 */
 	if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) {
 		printk(KERN_ERR "low_i2c: Invalid mode %d requested on"
-		       " bus %s !\n", mode, bus->busnode->full_name);
+		       " bus %pOF !\n", mode, bus->busnode);
 		return -EINVAL;
 	}
 	bus->mode = mode;
@@ -1146,8 +1146,8 @@ int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
 	WARN_ON(!bus->opened);
 
 	DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x,"
-	    " %d bytes, bus %s\n", bus->channel, addrdir, bus->mode, subsize,
-	    subaddr, len, bus->busnode->full_name);
+	    " %d bytes, bus %pOF\n", bus->channel, addrdir, bus->mode, subsize,
+	    subaddr, len, bus->busnode);
 
 	rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len);
 
@@ -1241,13 +1241,13 @@ static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args)
 
 	bus = pmac_i2c_find_bus(func->node);
 	if (bus == NULL) {
-		printk(KERN_ERR "low_i2c: Can't find bus for %s (pfunc)\n",
-		       func->node->full_name);
+		printk(KERN_ERR "low_i2c: Can't find bus for %pOF (pfunc)\n",
+		       func->node);
 		return NULL;
 	}
 	if (pmac_i2c_open(bus, 0)) {
-		printk(KERN_ERR "low_i2c: Can't open i2c bus for %s (pfunc)\n",
-		       func->node->full_name);
+		printk(KERN_ERR "low_i2c: Can't open i2c bus for %pOF (pfunc)\n",
+		       func->node);
 		return NULL;
 	}
 
@@ -1417,7 +1417,7 @@ static struct pmf_handlers pmac_i2c_pfunc_handlers = {
 
 static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
 {
-	DBG("dev_create(%s)\n", np->full_name);
+	DBG("dev_create(%pOF)\n", np);
 
 	pmf_register_driver(np, &pmac_i2c_pfunc_handlers,
 			    (void *)(long)quirks);
@@ -1425,20 +1425,20 @@ static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
 
 static void __init pmac_i2c_dev_init(struct device_node *np, int quirks)
 {
-	DBG("dev_create(%s)\n", np->full_name);
+	DBG("dev_create(%pOF)\n", np);
 
 	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
 }
 
 static void pmac_i2c_dev_suspend(struct device_node *np, int quirks)
 {
-	DBG("dev_suspend(%s)\n", np->full_name);
+	DBG("dev_suspend(%pOF)\n", np);
 	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL);
 }
 
 static void pmac_i2c_dev_resume(struct device_node *np, int quirks)
 {
-	DBG("dev_resume(%s)\n", np->full_name);
+	DBG("dev_resume(%pOF)\n", np);
 	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL);
 }
 
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 6e06c3be2e9a..0b8174a79993 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -783,7 +783,7 @@ static int __init pmac_add_bridge(struct device_node *dev)
 	const int *bus_range;
 	int primary = 1, has_address = 0;
 
-	DBG("Adding PCI host bridge %s\n", dev->full_name);
+	DBG("Adding PCI host bridge %pOF\n", dev);
 
 	/* Fetch host bridge registers address */
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
@@ -791,8 +791,8 @@ static int __init pmac_add_bridge(struct device_node *dev)
 	/* Get bus range if any */
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-		       " bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
 	}
 
 	hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 459138ed4571..860159d46ab8 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -54,8 +54,8 @@ static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask)
 	raw_spin_lock_irqsave(&feature_lock, flags);
 	tmp = readb(addr);
 	tmp = (tmp & ~mask) | (value & mask);
-	DBG("Do write 0x%02x to GPIO %s (%p)\n",
-	    tmp, func->node->full_name, addr);
+	DBG("Do write 0x%02x to GPIO %pOF (%p)\n",
+	    tmp, func->node, addr);
 	writeb(tmp, addr);
 	raw_spin_unlock_irqrestore(&feature_lock, flags);
 
@@ -107,8 +107,8 @@ static void macio_gpio_init_one(struct macio_chip *macio)
 	if (gparent == NULL)
 		return;
 
-	DBG("Installing GPIO functions for macio %s\n",
-	    macio->of_node->full_name);
+	DBG("Installing GPIO functions for macio %pOF\n",
+	    macio->of_node);
 
 	/*
 	 * Ok, got one, we dont need anything special to track them down, so
@@ -129,8 +129,8 @@ static void macio_gpio_init_one(struct macio_chip *macio)
 		pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset);
 	}
 
-	DBG("Calling initial GPIO functions for macio %s\n",
-	    macio->of_node->full_name);
+	DBG("Calling initial GPIO functions for macio %pOF\n",
+	    macio->of_node);
 
 	/* And now we run all the init ones */
 	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;)
@@ -267,8 +267,8 @@ static struct pmf_handlers macio_mmio_handlers = {
 
 static void macio_mmio_init_one(struct macio_chip *macio)
 {
-	DBG("Installing MMIO functions for macio %s\n",
-	    macio->of_node->full_name);
+	DBG("Installing MMIO functions for macio %pOF\n",
+	    macio->of_node);
 
 	pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio);
 }
@@ -298,8 +298,8 @@ static void uninorth_install_pfunc(void)
 {
 	struct device_node *np;
 
-	DBG("Installing functions for UniN %s\n",
-	    uninorth_node->full_name);
+	DBG("Installing functions for UniN %pOF\n",
+	    uninorth_node);
 
 	/*
 	 * Install handlers for the bridge itself
@@ -317,8 +317,8 @@ static void uninorth_install_pfunc(void)
 			break;
 		}
 	if (unin_hwclock) {
-		DBG("Installing functions for UniN clock %s\n",
-		    unin_hwclock->full_name);
+		DBG("Installing functions for UniN clock %pOF\n",
+		    unin_hwclock);
 		pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL);
 		pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT,
 				 NULL);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index 695e8c4d4224..df3c93bef228 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -708,7 +708,7 @@ int pmf_register_driver(struct device_node *np,
 	if (handlers == NULL)
 		return -EINVAL;
 
-	DBG("pmf: registering driver for node %s\n", np->full_name);
+	DBG("pmf: registering driver for node %pOF\n", np);
 
 	spin_lock_irqsave(&pmf_lock, flags);
 	dev = pmf_find_device(np);
@@ -781,7 +781,7 @@ void pmf_unregister_driver(struct device_node *np)
 	struct pmf_device *dev;
 	unsigned long flags;
 
-	DBG("pmf: unregistering driver for node %s\n", np->full_name);
+	DBG("pmf: unregistering driver for node %pOF\n", np);
 
 	spin_lock_irqsave(&pmf_lock, flags);
 	dev = pmf_find_device(np);
@@ -940,7 +940,7 @@ int pmf_call_one(struct pmf_function *func, struct pmf_args *args)
 	void *instdata = NULL;
 	int rc = 0;
 
-	DBG(" ** pmf_call_one(%s/%s) **\n", dev->node->full_name, func->name);
+	DBG(" ** pmf_call_one(%pOF/%s) **\n", dev->node, func->name);
 
 	if (dev->handlers->begin)
 		instdata = dev->handlers->begin(func, args);
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index f5f9ad7c3398..5e0719b27294 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -364,8 +364,8 @@ static void __init pmac_pic_probe_oldstyle(void)
 			(addr + 0x10);
 	of_node_put(master);
 
-	printk(KERN_INFO "irq: Found primary Apple PIC %s for %d irqs\n",
-	       master->full_name, max_real_irqs);
+	printk(KERN_INFO "irq: Found primary Apple PIC %pOF for %d irqs\n",
+	       master, max_real_irqs);
 
 	/* Map interrupts of cascaded controller */
 	if (slave && !of_address_to_resource(slave, 0, &r)) {
@@ -378,8 +378,8 @@ static void __init pmac_pic_probe_oldstyle(void)
 				(addr + 0x10);
 		pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
 
-		printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
-		       " cascade: %d\n", slave->full_name,
+		printk(KERN_INFO "irq: Found slave Apple PIC %pOF for %d irqs"
+		       " cascade: %d\n", slave,
 		       max_irqs - max_real_irqs, pmac_irq_cascade);
 	}
 	of_node_put(slave);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 6b4e9d181126..ab668cb72263 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -556,7 +556,7 @@ static int __init check_pmac_serial_console(void)
 		pr_debug(" can't find stdout package %s !\n", name);
 		return -ENODEV;
 	}
-	pr_debug("stdout is %s\n", prom_stdout->full_name);
+	pr_debug("stdout is %pOF\n", prom_stdout);
 
 	name = of_get_property(prom_stdout, "name", NULL);
 	if (!name) {
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 6a6f4ef46b9e..340cbe263b33 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -30,3 +30,25 @@ config OPAL_PRD
 	help
 	  This enables the opal-prd driver, a facility to run processor
 	  recovery diagnostics on OpenPower machines
+
+config PPC_MEMTRACE
+	bool "Enable removal of RAM from kernel mappings for tracing"
+	depends on PPC_POWERNV && MEMORY_HOTREMOVE
+	default n
+	help
+	  Enabling this option allows for the removal of memory (RAM)
+	  from the kernel mappings to be used for hardware tracing.
+
+config PPC_VAS
+	bool "IBM Virtual Accelerator Switchboard (VAS)"
+	depends on PPC_POWERNV && PPC_64K_PAGES
+	default y
+	help
+	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+	  VAS allows accelerators in co-processors like NX-GZIP and NX-842
+	  to be accessible to kernel subsystems and user processes.
+
+	  VAS adapters are found in POWER9 based systems.
+
+	  If unsure, say N.
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb3f482..37d60f7dd86d 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o idle.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y			+= opal-kmsg.o
+obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o
@@ -12,3 +12,6 @@ obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
 obj-$(CONFIG_OPAL_PRD)	+= opal-prd.o
+obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+obj-$(CONFIG_PPC_MEMTRACE)	+= memtrace.o
+obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000000000000..c9a503623431
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc-opcode.h>
+
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+/*
+ * Copy/paste instructions:
+ *
+ *	copy RA,RB
+ *		Copy contents of address (RA) + effective_address(RB)
+ *		to internal copy-buffer.
+ *
+ *	paste RA,RB
+ *		Paste contents of internal copy-buffer to the address
+ *		(RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+	asm volatile(PPC_COPY(%0, %1)";"
+		:
+		: "b" (offset), "b" (crb)
+		: "memory");
+
+	return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+	u32 cr;
+
+	cr = 0;
+	asm volatile(PPC_PASTE(%1, %2)";"
+		"mfocrf %0, 0x80;"
+		: "=r" (cr)
+		: "b" (offset), "b" (paste_address)
+		: "memory", "cr0");
+
+	return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3f48f6df1cf3..8864065eba22 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -113,7 +113,6 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
 				size_t count, loff_t *ppos)
 {
 	struct pci_controller *hose = filp->private_data;
-	struct eeh_dev *edev;
 	struct eeh_pe *pe;
 	int pe_no, type, func;
 	unsigned long addr, mask;
@@ -135,13 +134,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
 		return -EINVAL;
 
 	/* Retrieve PE */
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev)
-		return -ENOMEM;
-	edev->phb = hose;
-	edev->pe_config_addr = pe_no;
-	pe = eeh_pe_get(edev);
-	kfree(edev);
+	pe = eeh_pe_get(hose, pe_no, 0);
 	if (!pe)
 		return -ENODEV;
 
@@ -359,6 +352,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	uint32_t pcie_flags;
 	int ret;
+	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
 	/*
 	 * When probing the root bridge, which doesn't have any
@@ -393,8 +387,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 		}
 	}
 
-	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
-	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
+	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
 
 	/* Create PE */
 	ret = eeh_add_to_parent_pe(edev);
@@ -933,7 +926,6 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
 				     int pos, u16 mask)
 {
-	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	int i, status = 0;
 
 	/* Wait for Transaction Pending bit to be cleared */
@@ -947,7 +939,7 @@ static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
 
 	pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
 		__func__, type,
-		edev->phb->global_number, pdn->busno,
+		pdn->phb->global_number, pdn->busno,
 		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 }
 
@@ -1381,7 +1373,6 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
 	struct pnv_phb *phb = hose->private_data;
 	struct pnv_ioda_pe *pnv_pe;
 	struct eeh_pe *dev_pe;
-	struct eeh_dev edev;
 
 	/*
 	 * If PHB supports compound PE, to fetch
@@ -1397,10 +1388,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
 	}
 
 	/* Find the PE according to PE# */
-	memset(&edev, 0, sizeof(struct eeh_dev));
-	edev.phb = hose;
-	edev.pe_config_addr = pe_no;
-	dev_pe = eeh_pe_get(&edev);
+	dev_pe = eeh_pe_get(hose, pe_no, 0);
 	if (!dev_pe)
 		return -EEXIST;
 
@@ -1711,6 +1699,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	struct pnv_phb *phb;
 	s64 ret;
+	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
 	if (!edev)
 		return -EEXIST;
@@ -1725,14 +1714,14 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
 	if (edev->physfn) {
 		ret = pnv_eeh_restore_vf_config(pdn);
 	} else {
-		phb = edev->phb->private_data;
+		phb = pdn->phb->private_data;
 		ret = opal_pci_reinit(phb->opal_id,
-				      OPAL_REINIT_PCI_DEV, edev->config_addr);
+				      OPAL_REINIT_PCI_DEV, config_addr);
 	}
 
 	if (ret) {
 		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
-			__func__, edev->config_addr, ret);
+			__func__, config_addr, ret);
 		return -EIO;
 	}
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index a553aeea7af6..9f59041a172b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -69,7 +69,7 @@ static int pnv_save_sprs_for_deep_states(void)
 	 * all cpus at boot. Get these reg values of current cpu and use the
 	 * same across all cpus.
 	 */
-	uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+	uint64_t lpcr_val = mfspr(SPRN_LPCR);
 	uint64_t hid0_val = mfspr(SPRN_HID0);
 	uint64_t hid1_val = mfspr(SPRN_HID1);
 	uint64_t hid4_val = mfspr(SPRN_HID4);
@@ -388,6 +388,14 @@ void power9_idle(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+{
+	u64 pir = get_hard_smp_processor_id(cpu);
+
+	mtspr(SPRN_LPCR, lpcr_val);
+	opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+}
+
 /*
  * pnv_cpu_offline: A function that puts the CPU into the deepest
  * available platform idle state on a CPU-Offline.
@@ -397,6 +405,20 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
 {
 	unsigned long srr1;
 	u32 idle_states = pnv_get_supported_cpuidle_states();
+	u64 lpcr_val;
+
+	/*
+	 * We don't want to take decrementer interrupts while we are
+	 * offline, so clear LPCR:PECE1. We keep PECE2 (and
+	 * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
+	 *
+	 * If the CPU gets woken up by a special wakeup, ensure that
+	 * the SLW engine sets LPCR with decrementer bit cleared, else
+	 * the CPU will come back to the kernel due to a spurious
+	 * wakeup.
+	 */
+	lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+	pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
 
 	__ppc64_runlatch_off();
 
@@ -428,6 +450,16 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
 
 	__ppc64_runlatch_on();
 
+	/*
+	 * Re-enable decrementer interrupts in LPCR.
+	 *
+	 * Further, we want stop states to be woken up by decrementer
+	 * for non-hotplug cases. So program the LPCR via stop api as
+	 * well.
+	 */
+	lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+	pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
 	return srr1;
 }
 #endif
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 000000000000..de470caf0784
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) IBM Corporation, 2014, 2017
+ * Anton Blanchard, Rashmica Gupta.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "memtrace: " fmt
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <asm/machdep.h>
+#include <asm/debugfs.h>
+
+/* This enables us to keep track of the memory removed from each node. */
+struct memtrace_entry {
+	void *mem;
+	u64 start;
+	u64 size;
+	u32 nid;
+	struct dentry *dir;
+	char name[16];
+};
+
+static u64 memtrace_size;
+
+static struct memtrace_entry *memtrace_array;
+static unsigned int memtrace_array_nr;
+
+
+static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
+			     size_t count, loff_t *ppos)
+{
+	struct memtrace_entry *ent = filp->private_data;
+
+	return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
+}
+
+static bool valid_memtrace_range(struct memtrace_entry *dev,
+				 unsigned long start, unsigned long size)
+{
+	if ((start >= dev->start) &&
+	    ((start + size) <= (dev->start + dev->size)))
+		return true;
+
+	return false;
+}
+
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct memtrace_entry *dev = filp->private_data;
+
+	if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (remap_pfn_range(vma, vma->vm_start,
+			    vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
+			    size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static const struct file_operations memtrace_fops = {
+	.llseek = default_llseek,
+	.read	= memtrace_read,
+	.mmap	= memtrace_mmap,
+	.open	= simple_open,
+};
+
+static void flush_memory_region(u64 base, u64 size)
+{
+	unsigned long line_size = ppc64_caches.l1d.size;
+	u64 end = base + size;
+	u64 addr;
+
+	base = round_down(base, line_size);
+	end = round_up(end, line_size);
+
+	for (addr = base; addr < end; addr += line_size)
+		asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
+}
+
+static int check_memblock_online(struct memory_block *mem, void *arg)
+{
+	if (mem->state != MEM_ONLINE)
+		return -1;
+
+	return 0;
+}
+
+static int change_memblock_state(struct memory_block *mem, void *arg)
+{
+	unsigned long state = (unsigned long)arg;
+
+	mem->state = state;
+
+	return 0;
+}
+
+static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
+{
+	u64 end_pfn = start_pfn + nr_pages - 1;
+
+	if (walk_memory_range(start_pfn, end_pfn, NULL,
+	    check_memblock_online))
+		return false;
+
+	walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
+			  change_memblock_state);
+
+	if (offline_pages(start_pfn, nr_pages)) {
+		walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
+				  change_memblock_state);
+		return false;
+	}
+
+	walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
+			  change_memblock_state);
+
+	/* RCU grace period? */
+	flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
+			    nr_pages << PAGE_SHIFT);
+
+	lock_device_hotplug();
+	remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+	unlock_device_hotplug();
+
+	return true;
+}
+
+static u64 memtrace_alloc_node(u32 nid, u64 size)
+{
+	u64 start_pfn, end_pfn, nr_pages;
+	u64 base_pfn;
+
+	if (!NODE_DATA(nid) || !node_spanned_pages(nid))
+		return 0;
+
+	start_pfn = node_start_pfn(nid);
+	end_pfn = node_end_pfn(nid);
+	nr_pages = size >> PAGE_SHIFT;
+
+	/* Trace memory needs to be aligned to the size */
+	end_pfn = round_down(end_pfn - nr_pages, nr_pages);
+
+	for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
+		if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+			return base_pfn << PAGE_SHIFT;
+	}
+
+	return 0;
+}
+
+static int memtrace_init_regions_runtime(u64 size)
+{
+	u32 nid;
+	u64 m;
+
+	memtrace_array = kcalloc(num_online_nodes(),
+				sizeof(struct memtrace_entry), GFP_KERNEL);
+	if (!memtrace_array) {
+		pr_err("Failed to allocate memtrace_array\n");
+		return -EINVAL;
+	}
+
+	for_each_online_node(nid) {
+		m = memtrace_alloc_node(nid, size);
+
+		/*
+		 * A node might not have any local memory, so warn but
+		 * continue on.
+		 */
+		if (!m) {
+			pr_err("Failed to allocate trace memory on node %d\n", nid);
+			continue;
+		}
+
+		pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
+
+		memtrace_array[memtrace_array_nr].start = m;
+		memtrace_array[memtrace_array_nr].size = size;
+		memtrace_array[memtrace_array_nr].nid = nid;
+		memtrace_array_nr++;
+	}
+
+	return 0;
+}
+
+static struct dentry *memtrace_debugfs_dir;
+
+static int memtrace_init_debugfs(void)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 0; i < memtrace_array_nr; i++) {
+		struct dentry *dir;
+		struct memtrace_entry *ent = &memtrace_array[i];
+
+		ent->mem = ioremap(ent->start, ent->size);
+		/* Warn but continue on */
+		if (!ent->mem) {
+			pr_err("Failed to map trace memory at 0x%llx\n",
+				 ent->start);
+			ret = -1;
+			continue;
+		}
+
+		snprintf(ent->name, 16, "%08x", ent->nid);
+		dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
+		if (!dir)
+			return -1;
+
+		ent->dir = dir;
+		debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
+		debugfs_create_x64("start", 0400, dir, &ent->start);
+		debugfs_create_x64("size", 0400, dir, &ent->size);
+	}
+
+	return ret;
+}
+
+static int memtrace_enable_set(void *data, u64 val)
+{
+	if (memtrace_size)
+		return -EINVAL;
+
+	if (!val)
+		return -EINVAL;
+
+	/* Make sure size is aligned to a memory block */
+	if (val & (memory_block_size_bytes() - 1))
+		return -EINVAL;
+
+	if (memtrace_init_regions_runtime(val))
+		return -EINVAL;
+
+	if (memtrace_init_debugfs())
+		return -EINVAL;
+
+	memtrace_size = val;
+
+	return 0;
+}
+
+static int memtrace_enable_get(void *data, u64 *val)
+{
+	*val = memtrace_size;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
+					memtrace_enable_set, "0x%016llx\n");
+
+static int memtrace_init(void)
+{
+	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
+						  powerpc_debugfs_root);
+	if (!memtrace_debugfs_dir)
+		return -1;
+
+	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
+			    NULL, &memtrace_init_fops);
+
+	return 0;
+}
+machine_device_initcall(powernv, memtrace_init);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 4c7b8591f737..2cb6cbea4b3b 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -546,6 +546,12 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
+	 * Unfortunately the nest mmu does not support flushing specific
+	 * addresses so we have to flush the whole mm.
+	 */
+	flush_tlb_mm(npu_context->mm);
+
+	/*
 	 * Loop over all the NPUs this process is active on and launch
 	 * an invalidate.
 	 */
@@ -576,12 +582,6 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 		}
 	}
 
-	/*
-	 * Unfortunately the nest mmu does not support flushing specific
-	 * addresses so we have to flush the whole mm.
-	 */
-	flush_tlb_mm(npu_context->mm);
-
 	mmio_invalidate_wait(mmio_atsd_reg, flush);
 	if (flush)
 		/* Wait for the flush to complete */
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 83bebeec0fea..cf33769a7b72 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -171,8 +171,8 @@ int __init opal_async_comp_init(void)
 
 	async = of_get_property(opal_node, "opal-msg-async-num", NULL);
 	if (!async) {
-		pr_err("%s: %s has no opal-msg-async-num\n",
-				__func__, opal_node->full_name);
+		pr_err("%s: %pOF has no opal-msg-async-num\n",
+				__func__, opal_node);
 		err = -ENOENT;
 		goto out_opal_node;
 	}
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 4ec6219287fc..2fa3ac80cb4e 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -520,7 +520,7 @@ out:
  *   update_flash	: Flash new firmware image
  *
  */
-static struct bin_attribute image_data_attr = {
+static const struct bin_attribute image_data_attr = {
 	.attr = {.name = "image", .mode = 0200},
 	.size = MAX_IMAGE_SIZE,	/* Limit image size */
 	.write = image_data_write,
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 88f3c61eec95..d78fed728cdf 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -30,6 +30,8 @@
 #include <asm/cputable.h>
 #include <asm/machdep.h>
 
+#include "powernv.h"
+
 static int opal_hmi_handler_nb_init;
 struct OpalHmiEvtNode {
 	struct list_head list;
@@ -267,8 +269,6 @@ static void hmi_event_handler(struct work_struct *work)
 	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
 
 	if (unrecoverable) {
-		int ret;
-
 		/* Pull all HMI events from OPAL before we panic. */
 		while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
 			u32 type;
@@ -284,23 +284,7 @@ static void hmi_event_handler(struct work_struct *work)
 			print_hmi_event_info(hmi_evt);
 		}
 
-		/*
-		 * Unrecoverable HMI exception. We need to inform BMC/OCC
-		 * about this error so that it can collect relevant data
-		 * for error analysis before rebooting.
-		 */
-		ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
-			"Unrecoverable HMI exception");
-		if (ret == OPAL_UNSUPPORTED) {
-			pr_emerg("Reboot type %d not supported\n",
-						OPAL_REBOOT_PLATFORM_ERROR);
-		}
-
-		/*
-		 * Fall through and panic if opal_cec_reboot2() returns
-		 * OPAL_UNSUPPORTED.
-		 */
-		panic("Unrecoverable HMI exception");
+		pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
 	}
 }
 
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000000000000..21f6531fae20
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,226 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright	(C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *		(C) 2017 Anju T Sudhakar, IBM Corporation.
+ *		(C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/crash_dump.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+
+/*
+ * imc_get_mem_addr_nest: Function to get nest counter memory region
+ * for each chip
+ */
+static int imc_get_mem_addr_nest(struct device_node *node,
+				 struct imc_pmu *pmu_ptr,
+				 u32 offset)
+{
+	int nr_chips = 0, i;
+	u64 *base_addr_arr, baddr;
+	u32 *chipid_arr;
+
+	nr_chips = of_property_count_u32_elems(node, "chip-id");
+	if (nr_chips <= 0)
+		return -ENODEV;
+
+	base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+	if (!base_addr_arr)
+		return -ENOMEM;
+
+	chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+	if (!chipid_arr)
+		return -ENOMEM;
+
+	if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
+		goto error;
+
+	if (of_property_read_u64_array(node, "base-addr", base_addr_arr,
+								nr_chips))
+		goto error;
+
+	pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
+								GFP_KERNEL);
+	if (!pmu_ptr->mem_info)
+		goto error;
+
+	for (i = 0; i < nr_chips; i++) {
+		pmu_ptr->mem_info[i].id = chipid_arr[i];
+		baddr = base_addr_arr[i] + offset;
+		pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr);
+	}
+
+	pmu_ptr->imc_counter_mmaped = true;
+	kfree(base_addr_arr);
+	kfree(chipid_arr);
+	return 0;
+
+error:
+	kfree(pmu_ptr->mem_info);
+	kfree(base_addr_arr);
+	kfree(chipid_arr);
+	return -1;
+}
+
+/*
+ * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
+ *		    and domain as the inputs.
+ * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
+ */
+static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+{
+	int ret = 0;
+	struct imc_pmu *pmu_ptr;
+	u32 offset;
+
+	/* memory for pmu */
+	pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+	if (!pmu_ptr)
+		return -ENOMEM;
+
+	/* Set the domain */
+	pmu_ptr->domain = domain;
+
+	ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
+	if (ret) {
+		ret = -EINVAL;
+		goto free_pmu;
+	}
+
+	if (!of_property_read_u32(parent, "offset", &offset)) {
+		if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
+			ret = -EINVAL;
+			goto free_pmu;
+		}
+	}
+
+	/* Function to register IMC pmu */
+	ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
+	if (ret)
+		pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+
+	return 0;
+
+free_pmu:
+	kfree(pmu_ptr);
+	return ret;
+}
+
+static void disable_nest_pmu_counters(void)
+{
+	int nid, cpu;
+	const struct cpumask *l_cpumask;
+
+	get_online_cpus();
+	for_each_online_node(nid) {
+		l_cpumask = cpumask_of_node(nid);
+		cpu = cpumask_first(l_cpumask);
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				       get_hard_smp_processor_id(cpu));
+	}
+	put_online_cpus();
+}
+
+static void disable_core_pmu_counters(void)
+{
+	cpumask_t cores_map;
+	int cpu, rc;
+
+	get_online_cpus();
+	/* Disable the IMC Core functions */
+	cores_map = cpu_online_cores_map();
+	for_each_cpu(cpu, &cores_map) {
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+					    get_hard_smp_processor_id(cpu));
+		if (rc)
+			pr_err("%s: Failed to stop Core (cpu = %d)\n",
+				__FUNCTION__, cpu);
+	}
+	put_online_cpus();
+}
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+	struct device_node *imc_dev = pdev->dev.of_node;
+	int pmu_count = 0, domain;
+	u32 type;
+
+	/*
+	 * Check whether this is kdump kernel. If yes, force the engines to
+	 * stop and return.
+	 */
+	if (is_kdump_kernel()) {
+		disable_nest_pmu_counters();
+		disable_core_pmu_counters();
+		return -ENODEV;
+	}
+
+	for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+		if (of_property_read_u32(imc_dev, "type", &type)) {
+			pr_warn("IMC Device without type property\n");
+			continue;
+		}
+
+		switch (type) {
+		case IMC_TYPE_CHIP:
+			domain = IMC_DOMAIN_NEST;
+			break;
+		case IMC_TYPE_CORE:
+			domain =IMC_DOMAIN_CORE;
+			break;
+		case IMC_TYPE_THREAD:
+			domain = IMC_DOMAIN_THREAD;
+			break;
+		default:
+			pr_warn("IMC Unknown Device type \n");
+			domain = -1;
+			break;
+		}
+
+		if (!imc_pmu_create(imc_dev, pmu_count, domain))
+			pmu_count++;
+	}
+
+	return 0;
+}
+
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+	/*
+	 * Function only stops the engines which is bare minimum.
+	 * TODO: Need to handle proper memory cleanup and pmu
+	 * unregister.
+	 */
+	disable_nest_pmu_counters();
+	disable_core_pmu_counters();
+}
+
+static const struct of_device_id opal_imc_match[] = {
+	{ .compatible = IMC_DTB_COMPAT },
+	{},
+};
+
+static struct platform_driver opal_imc_driver = {
+	.driver = {
+		.name = "opal-imc-counters",
+		.of_match_table = opal_imc_match,
+	},
+	.probe = opal_imc_counters_probe,
+	.shutdown = opal_imc_counters_shutdown,
+};
+
+builtin_platform_driver(opal_imc_driver);
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
new file mode 100644
index 000000000000..badb29bde93f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -0,0 +1,244 @@
+/*
+ * PowerNV OPAL Powercap interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)     "opal-powercap: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(powercap_mutex);
+
+static struct kobject *powercap_kobj;
+
+struct powercap_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+};
+
+static struct pcap {
+	struct attribute_group pg;
+	struct powercap_attr *pattrs;
+} *pcaps;
+
+static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *buf)
+{
+	struct powercap_attr *pcap_attr = container_of(attr,
+						struct powercap_attr, attr);
+	struct opal_msg msg;
+	u32 pcap;
+	int ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&powercap_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap));
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret) {
+			ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+			if (ret < 0)
+				ret = -EIO;
+		}
+		break;
+	case OPAL_SUCCESS:
+		ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+		if (ret < 0)
+			ret = -EIO;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&powercap_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static ssize_t powercap_store(struct kobject *kobj,
+			      struct kobj_attribute *attr, const char *buf,
+			      size_t count)
+{
+	struct powercap_attr *pcap_attr = container_of(attr,
+						struct powercap_attr, attr);
+	struct opal_msg msg;
+	u32 pcap;
+	int ret, token;
+
+	ret = kstrtoint(buf, 0, &pcap);
+	if (ret)
+		return ret;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&powercap_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_set_powercap(pcap_attr->handle, token, pcap);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&powercap_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static void powercap_add_attr(int handle, const char *name,
+			      struct powercap_attr *attr)
+{
+	attr->handle = handle;
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = powercap_show;
+}
+
+void __init opal_powercap_init(void)
+{
+	struct device_node *powercap, *node;
+	int i = 0;
+
+	powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap");
+	if (!powercap) {
+		pr_devel("Powercap node not found\n");
+		return;
+	}
+
+	pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
+			GFP_KERNEL);
+	if (!pcaps)
+		return;
+
+	powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
+	if (!powercap_kobj) {
+		pr_warn("Failed to create powercap kobject\n");
+		goto out_pcaps;
+	}
+
+	i = 0;
+	for_each_child_of_node(powercap, node) {
+		u32 cur, min, max;
+		int j = 0;
+		bool has_cur = false, has_min = false, has_max = false;
+
+		if (!of_property_read_u32(node, "powercap-min", &min)) {
+			j++;
+			has_min = true;
+		}
+
+		if (!of_property_read_u32(node, "powercap-max", &max)) {
+			j++;
+			has_max = true;
+		}
+
+		if (!of_property_read_u32(node, "powercap-current", &cur)) {
+			j++;
+			has_cur = true;
+		}
+
+		pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr),
+					  GFP_KERNEL);
+		if (!pcaps[i].pattrs)
+			goto out_pcaps_pattrs;
+
+		pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *),
+					    GFP_KERNEL);
+		if (!pcaps[i].pg.attrs) {
+			kfree(pcaps[i].pattrs);
+			goto out_pcaps_pattrs;
+		}
+
+		j = 0;
+		pcaps[i].pg.name = node->name;
+		if (has_min) {
+			powercap_add_attr(min, "powercap-min",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (has_max) {
+			powercap_add_attr(max, "powercap-max",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (has_cur) {
+			powercap_add_attr(cur, "powercap-current",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pattrs[j].attr.attr.mode |= 0220;
+			pcaps[i].pattrs[j].attr.store = powercap_store;
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) {
+			pr_warn("Failed to create powercap attribute group %s\n",
+				pcaps[i].pg.name);
+			goto out_pcaps_pattrs;
+		}
+		i++;
+	}
+
+	return;
+
+out_pcaps_pattrs:
+	while (--i >= 0) {
+		kfree(pcaps[i].pattrs);
+		kfree(pcaps[i].pg.attrs);
+	}
+	kobject_put(powercap_kobj);
+out_pcaps:
+	kfree(pcaps);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 2d6ee1c5ad85..de4dd09f4a15 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -241,15 +241,9 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
 
 	size = be16_to_cpu(hdr.size);
 
-	msg = kmalloc(size, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	rc = copy_from_user(msg, buf, size);
-	if (rc) {
-		size = -EFAULT;
-		goto out_free;
-	}
+	msg = memdup_user(buf, size);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	rc = opal_prd_msg(msg);
 	if (rc) {
@@ -257,7 +251,6 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
 		size = -EIO;
 	}
 
-out_free:
 	kfree(msg);
 
 	return size;
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
new file mode 100644
index 000000000000..7313b7fc9071
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -0,0 +1,175 @@
+/*
+ * PowerNV OPAL Power-Shift-Ratio interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)     "opal-psr: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(psr_mutex);
+
+static struct kobject *psr_kobj;
+
+struct psr_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+} *psr_attrs;
+
+static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+	struct opal_msg msg;
+	int psr, ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&psr_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_get_power_shift_ratio(psr_attr->handle, token,
+					    (u32 *)__pa(&psr));
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret) {
+			ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+			if (ret < 0)
+				ret = -EIO;
+		}
+		break;
+	case OPAL_SUCCESS:
+		ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+		if (ret < 0)
+			ret = -EIO;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&psr_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+	struct opal_msg msg;
+	int psr, ret, token;
+
+	ret = kstrtoint(buf, 0, &psr);
+	if (ret)
+		return ret;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&psr_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&psr_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+void __init opal_psr_init(void)
+{
+	struct device_node *psr, *node;
+	int i = 0;
+
+	psr = of_find_compatible_node(NULL, NULL,
+				      "ibm,opal-power-shift-ratio");
+	if (!psr) {
+		pr_devel("Power-shift-ratio node not found\n");
+		return;
+	}
+
+	psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+			    GFP_KERNEL);
+	if (!psr_attrs)
+		return;
+
+	psr_kobj = kobject_create_and_add("psr", opal_kobj);
+	if (!psr_kobj) {
+		pr_warn("Failed to create psr kobject\n");
+		goto out;
+	}
+
+	for_each_child_of_node(psr, node) {
+		if (of_property_read_u32(node, "handle",
+					 &psr_attrs[i].handle))
+			goto out_kobj;
+
+		sysfs_attr_init(&psr_attrs[i].attr.attr);
+		if (of_property_read_string(node, "label",
+					    &psr_attrs[i].attr.attr.name))
+			goto out_kobj;
+		psr_attrs[i].attr.attr.mode = 0664;
+		psr_attrs[i].attr.show = psr_show;
+		psr_attrs[i].attr.store = psr_store;
+		if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) {
+			pr_devel("Failed to create psr sysfs file %s\n",
+				 psr_attrs[i].attr.attr.name);
+			goto out_kobj;
+		}
+		i++;
+	}
+
+	return;
+out_kobj:
+	kobject_put(psr_kobj);
+out:
+	kfree(psr_attrs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
new file mode 100644
index 000000000000..7e5a235ebf76
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -0,0 +1,212 @@
+/*
+ * PowerNV OPAL Sensor-groups interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)     "opal-sensor-groups: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(sg_mutex);
+
+static struct kobject *sg_kobj;
+
+struct sg_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+};
+
+static struct sensor_group {
+	char name[20];
+	struct attribute_group sg;
+	struct sg_attr *sgattrs;
+} *sgs;
+
+static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct sg_attr *sattr = container_of(attr, struct sg_attr, attr);
+	struct opal_msg msg;
+	u32 data;
+	int ret, token;
+
+	ret = kstrtoint(buf, 0, &data);
+	if (ret)
+		return ret;
+
+	if (data != 1)
+		return -EINVAL;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&sg_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_sensor_group_clear(sattr->handle, token);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&sg_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static struct sg_ops_info {
+	int opal_no;
+	const char *attr_name;
+	ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t count);
+} ops_info[] = {
+	{ OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store },
+};
+
+static void add_attr(int handle, struct sg_attr *attr, int index)
+{
+	attr->handle = handle;
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = ops_info[index].attr_name;
+	attr->attr.attr.mode = 0220;
+	attr->attr.store = ops_info[index].store;
+}
+
+static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+			   u32 handle)
+{
+	int i, j;
+	int count = 0;
+
+	for (i = 0; i < len; i++)
+		for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+			if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) {
+				add_attr(handle, &sg->sgattrs[count], j);
+				sg->sg.attrs[count] =
+					&sg->sgattrs[count].attr.attr;
+				count++;
+			}
+
+	return sysfs_create_group(sg_kobj, &sg->sg);
+}
+
+static int get_nr_attrs(const __be32 *ops, int len)
+{
+	int i, j;
+	int nr_attrs = 0;
+
+	for (i = 0; i < len; i++)
+		for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+			if (be32_to_cpu(ops[i]) == ops_info[j].opal_no)
+				nr_attrs++;
+
+	return nr_attrs;
+}
+
+void __init opal_sensor_groups_init(void)
+{
+	struct device_node *sg, *node;
+	int i = 0;
+
+	sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
+	if (!sg) {
+		pr_devel("Sensor groups node not found\n");
+		return;
+	}
+
+	sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
+	if (!sgs)
+		return;
+
+	sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
+	if (!sg_kobj) {
+		pr_warn("Failed to create sensor group kobject\n");
+		goto out_sgs;
+	}
+
+	for_each_child_of_node(sg, node) {
+		const __be32 *ops;
+		u32 sgid, len, nr_attrs, chipid;
+
+		ops = of_get_property(node, "ops", &len);
+		if (!ops)
+			continue;
+
+		nr_attrs = get_nr_attrs(ops, len);
+		if (!nr_attrs)
+			continue;
+
+		sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+					 GFP_KERNEL);
+		if (!sgs[i].sgattrs)
+			goto out_sgs_sgattrs;
+
+		sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
+					  sizeof(struct attribute *),
+					  GFP_KERNEL);
+
+		if (!sgs[i].sg.attrs) {
+			kfree(sgs[i].sgattrs);
+			goto out_sgs_sgattrs;
+		}
+
+		if (of_property_read_u32(node, "sensor-group-id", &sgid)) {
+			pr_warn("sensor-group-id property not found\n");
+			goto out_sgs_sgattrs;
+		}
+
+		if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
+			sprintf(sgs[i].name, "%s%d", node->name, chipid);
+		else
+			sprintf(sgs[i].name, "%s", node->name);
+
+		sgs[i].sg.name = sgs[i].name;
+		if (add_attr_group(ops, len, &sgs[i], sgid)) {
+			pr_warn("Failed to create sensor attribute group %s\n",
+				sgs[i].sg.name);
+			goto out_sgs_sgattrs;
+		}
+		i++;
+	}
+
+	return;
+
+out_sgs_sgattrs:
+	while (--i >= 0) {
+		kfree(sgs[i].sgattrs);
+		kfree(sgs[i].sg.attrs);
+	}
+	kobject_put(sg_kobj);
+out_sgs:
+	kfree(sgs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4ca6c26a56d5..8c1ede2d3f7e 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -27,7 +27,7 @@
 
 	.globl opal_tracepoint_refcount
 opal_tracepoint_refcount:
-	.llong	0
+	.8byte	0
 
 	.section	".text"
 
@@ -310,3 +310,12 @@ OPAL_CALL(opal_xive_dump,			OPAL_XIVE_DUMP);
 OPAL_CALL(opal_npu_init_context,		OPAL_NPU_INIT_CONTEXT);
 OPAL_CALL(opal_npu_destroy_context,		OPAL_NPU_DESTROY_CONTEXT);
 OPAL_CALL(opal_npu_map_lpar,			OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init,		OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start,		OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop,		OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_pci_set_p2p,			OPAL_PCI_SET_P2P);
+OPAL_CALL(opal_get_powercap,			OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap,			OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio,		OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio,		OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear,		OPAL_SENSOR_GROUP_CLEAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 28651fb25417..81c0a943dea9 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -36,14 +36,14 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
 	const __be32 *gcid;
 
 	if (!of_get_property(dev, "scom-controller", NULL)) {
-		pr_err("%s: device %s is not a SCOM controller\n",
-			__func__, dev->full_name);
+		pr_err("%s: device %pOF is not a SCOM controller\n",
+			__func__, dev);
 		return SCOM_MAP_INVALID;
 	}
 	gcid = of_get_property(dev, "ibm,chip-id", NULL);
 	if (!gcid) {
-		pr_err("%s: device %s has no ibm,chip-id\n",
-			__func__, dev->full_name);
+		pr_err("%s: device %pOF has no ibm,chip-id\n",
+			__func__, dev);
 		return SCOM_MAP_INVALID;
 	}
 	m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cad6b57ce494..65c79ecf5a4d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
+#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/slab.h>
@@ -25,11 +26,17 @@
 #include <linux/memblock.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/printk.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/sched/debug.h>
 
 #include <asm/machdep.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
 #include <asm/mce.h>
+#include <asm/imc-pmu.h>
+#include <asm/bug.h>
 
 #include "powernv.h"
 
@@ -162,12 +169,9 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 			sizeof(struct mcheck_recoverable_range);
 
 	/*
-	 * Allocate a buffer to hold the MC recoverable ranges. We would be
-	 * accessing them in real mode, hence it needs to be within
-	 * RMO region.
+	 * Allocate a buffer to hold the MC recoverable ranges.
 	 */
-	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
-							ppc64_rma_size));
+	mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
 	memset(mc_recoverable_range, 0, size);
 
 	for (i = 0; i < mc_recoverable_range_len; i++) {
@@ -422,24 +426,88 @@ static int opal_recover_mce(struct pt_regs *regs,
 		/* Fatal machine check */
 		pr_err("Machine check interrupt is fatal\n");
 		recovered = 0;
-	} else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
-			(user_mode(regs) && !is_global_init(current))) {
+	}
+
+	if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
 		/*
-		 * For now, kill the task if we have received exception when
-		 * in userspace.
+		 * Try to kill processes if we get a synchronous machine check
+		 * (e.g., one caused by execution of this instruction). This
+		 * will devolve into a panic if we try to kill init or are in
+		 * an interrupt etc.
 		 *
 		 * TODO: Queue up this address for hwpoisioning later.
+		 * TODO: This is not quite right for d-side machine
+		 *       checks ->nip is not necessarily the important
+		 *       address.
 		 */
-		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-		recovered = 1;
+		if ((user_mode(regs))) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else if (die_will_crash()) {
+			/*
+			 * die() would kill the kernel, so better to go via
+			 * the platform reboot code that will log the
+			 * machine check.
+			 */
+			recovered = 0;
+		} else {
+			die("Machine check", regs, SIGBUS);
+			recovered = 1;
+		}
 	}
+
 	return recovered;
 }
 
+void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+{
+	/*
+	 * This is mostly taken from kernel/panic.c, but tries to do
+	 * relatively minimal work. Don't use delay functions (TB may
+	 * be broken), don't crash dump (need to set a firmware log),
+	 * don't run notifiers. We do want to get some information to
+	 * Linux console.
+	 */
+	console_verbose();
+	bust_spinlocks(1);
+	pr_emerg("Hardware platform error: %s\n", msg);
+	if (regs)
+		show_regs(regs);
+	smp_send_stop();
+	printk_safe_flush_on_panic();
+	kmsg_dump(KMSG_DUMP_PANIC);
+	bust_spinlocks(0);
+	debug_locks_off();
+	console_flush_on_panic();
+
+	/*
+	 * Don't bother to shut things down because this will
+	 * xstop the system.
+	 */
+	if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
+						== OPAL_UNSUPPORTED) {
+		pr_emerg("Reboot type %d not supported for %s\n",
+				OPAL_REBOOT_PLATFORM_ERROR, msg);
+	}
+
+	/*
+	 * We reached here. There can be three possibilities:
+	 * 1. We are running on a firmware level that do not support
+	 *    opal_cec_reboot2()
+	 * 2. We are running on a firmware level that do not support
+	 *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
+	 * 3. We are running on FSP based system that does not need
+	 *    opal to trigger checkstop explicitly for error analysis.
+	 *    The FSP PRD component would have already got notified
+	 *    about this error through other channels.
+	 */
+
+	ppc_md.restart(NULL);
+}
+
 int opal_machine_check(struct pt_regs *regs)
 {
 	struct machine_check_event evt;
-	int ret;
 
 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 		return 0;
@@ -455,43 +523,7 @@ int opal_machine_check(struct pt_regs *regs)
 	if (opal_recover_mce(regs, &evt))
 		return 1;
 
-	/*
-	 * Unrecovered machine check, we are heading to panic path.
-	 *
-	 * We may have hit this MCE in very early stage of kernel
-	 * initialization even before opal-prd has started running. If
-	 * this is the case then this MCE error may go un-noticed or
-	 * un-analyzed if we go down panic path. We need to inform
-	 * BMC/OCC about this error so that they can collect relevant
-	 * data for error analysis before rebooting.
-	 * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
-	 * This function may not return on BMC based system.
-	 */
-	ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
-			"Unrecoverable Machine Check exception");
-	if (ret == OPAL_UNSUPPORTED) {
-		pr_emerg("Reboot type %d not supported\n",
-					OPAL_REBOOT_PLATFORM_ERROR);
-	}
-
-	/*
-	 * We reached here. There can be three possibilities:
-	 * 1. We are running on a firmware level that do not support
-	 *    opal_cec_reboot2()
-	 * 2. We are running on a firmware level that do not support
-	 *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
-	 * 3. We are running on FSP based system that does not need opal
-	 *    to trigger checkstop explicitly for error analysis. The FSP
-	 *    PRD component would have already got notified about this
-	 *    error through other channels.
-	 *
-	 * If hardware marked this as an unrecoverable MCE, we are
-	 * going to panic anyway. Even if it didn't, it's not safe to
-	 * continue at this point, so we should explicitly panic.
-	 */
-
-	panic("PowerNV Unrecovered Machine Check");
-	return 0;
+	pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
 }
 
 /* Early hmi handler called in real mode. */
@@ -720,6 +752,15 @@ static void opal_pdev_init(const char *compatible)
 		of_platform_device_create(np, NULL, NULL);
 }
 
+static void __init opal_imc_init_dev(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+	if (np)
+		of_platform_device_create(np, NULL, NULL);
+}
+
 static int kopald(void *unused)
 {
 	unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
@@ -793,6 +834,9 @@ static int __init opal_init(void)
 	/* Setup a heatbeat thread if requested by OPAL */
 	opal_init_heartbeat();
 
+	/* Detect In-Memory Collection counters and create devices*/
+	opal_imc_init_dev();
+
 	/* Create leds platform devices */
 	leds = of_find_node_by_path("/ibm,opal/leds");
 	if (leds) {
@@ -836,6 +880,15 @@ static int __init opal_init(void)
 	/* Initialise OPAL kmsg dumper for flushing console on panic */
 	opal_kmsg_init();
 
+	/* Initialise OPAL powercap interface */
+	opal_powercap_init();
+
+	/* Initialise OPAL Power-Shifting-Ratio interface */
+	opal_psr_init();
+
+	/* Initialise OPAL sensor groups */
+	opal_sensor_groups_init();
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
@@ -952,6 +1005,7 @@ int opal_error_code(int rc)
 	case OPAL_UNSUPPORTED:		return -EIO;
 	case OPAL_HARDWARE:		return -EIO;
 	case OPAL_INTERNAL_ERROR:	return -EIO;
+	case OPAL_TIMEOUT:		return -ETIMEDOUT;
 	default:
 		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
 		return -EIO;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b900eb1d5e17..57f9e55f4352 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -444,8 +444,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
 
 	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
 	if (!r) {
-		pr_info("  No <ibm,opal-m64-window> on %s\n",
-			dn->full_name);
+		pr_info("  No <ibm,opal-m64-window> on %pOF\n",
+			dn);
 		return;
 	}
 
@@ -1408,7 +1408,6 @@ m64_failed:
 
 static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
 		int num);
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
 
 static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
 {
@@ -2402,7 +2401,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
 	return 0;
 }
 
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
 {
 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
 	int64_t rc;
@@ -3797,8 +3796,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	if (!of_device_is_available(np))
 		return;
 
-	pr_info("Initializing %s PHB (%s)\n",
-		pnv_phb_names[ioda_type], of_node_full_name(np));
+	pr_info("Initializing %s PHB (%pOF)\n",	pnv_phb_names[ioda_type], np);
 
 	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
 	if (!prop64) {
@@ -3813,8 +3811,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
 	if (!phb->hose) {
-		pr_err("  Can't allocate PCI controller for %s\n",
-		       np->full_name);
+		pr_err("  Can't allocate PCI controller for %pOF\n",
+		       np);
 		memblock_free(__pa(phb), sizeof(struct pnv_phb));
 		return;
 	}
@@ -3825,7 +3823,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		hose->first_busno = be32_to_cpu(prop32[0]);
 		hose->last_busno = be32_to_cpu(prop32[1]);
 	} else {
-		pr_warn("  Broken <bus-range> on %s\n", np->full_name);
+		pr_warn("  Broken <bus-range> on %pOF\n", np);
 		hose->first_busno = 0;
 		hose->last_busno = 0xff;
 	}
@@ -4046,7 +4044,7 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
 	const __be64 *prop64;
 	u64 hub_id;
 
-	pr_info("Probing IODA IO-Hub %s\n", np->full_name);
+	pr_info("Probing IODA IO-Hub %pOF\n", np);
 
 	prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
 	if (!prop64) {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 7905d179d036..5422f4a6317c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -37,6 +37,8 @@
 #include "powernv.h"
 #include "pci.h"
 
+static DEFINE_MUTEX(p2p_mutex);
+
 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
 {
 	struct device_node *parent = np;
@@ -1017,6 +1019,79 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus)
 	}
 }
 
+int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb_init, *phb_target;
+	struct pnv_ioda_pe *pe_init;
+	int rc;
+
+	if (!opal_check_token(OPAL_PCI_SET_P2P))
+		return -ENXIO;
+
+	hose = pci_bus_to_host(initiator->bus);
+	phb_init = hose->private_data;
+
+	hose = pci_bus_to_host(target->bus);
+	phb_target = hose->private_data;
+
+	pe_init = pnv_ioda_get_pe(initiator);
+	if (!pe_init)
+		return -ENODEV;
+
+	/*
+	 * Configuring the initiator's PHB requires to adjust its
+	 * TVE#1 setting. Since the same device can be an initiator
+	 * several times for different target devices, we need to keep
+	 * a reference count to know when we can restore the default
+	 * bypass setting on its TVE#1 when disabling. Opal is not
+	 * tracking PE states, so we add a reference count on the PE
+	 * in linux.
+	 *
+	 * For the target, the configuration is per PHB, so we keep a
+	 * target reference count on the PHB.
+	 */
+	mutex_lock(&p2p_mutex);
+
+	if (desc & OPAL_PCI_P2P_ENABLE) {
+		/* always go to opal to validate the configuration */
+		rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
+				      desc, pe_init->pe_number);
+
+		if (rc != OPAL_SUCCESS) {
+			rc = -EIO;
+			goto out;
+		}
+
+		pe_init->p2p_initiator_count++;
+		phb_target->p2p_target_count++;
+	} else {
+		if (!pe_init->p2p_initiator_count ||
+			!phb_target->p2p_target_count) {
+			rc = -EINVAL;
+			goto out;
+		}
+
+		if (--pe_init->p2p_initiator_count == 0)
+			pnv_pci_ioda2_set_bypass(pe_init, true);
+
+		if (--phb_target->p2p_target_count == 0) {
+			rc = opal_pci_set_p2p(phb_init->opal_id,
+					      phb_target->opal_id, desc,
+					      pe_init->pe_number);
+			if (rc != OPAL_SUCCESS) {
+				rc = -EIO;
+				goto out;
+			}
+		}
+	}
+	rc = 0;
+out:
+	mutex_unlock(&p2p_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index f16bc403ec03..a95273c524f6 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -78,6 +78,9 @@ struct pnv_ioda_pe {
 	struct pnv_ioda_pe	*master;
 	struct list_head	slaves;
 
+	/* PCI peer-to-peer*/
+	int			p2p_initiator_count;
+
 	/* Link in list of PE#s */
 	struct list_head	list;
 };
@@ -189,6 +192,7 @@ struct pnv_phb {
 #ifdef CONFIG_CXL_BASE
 	struct cxl_afu *cxl_afu;
 #endif
+	int p2p_target_count;
 };
 
 extern struct pci_ops pnv_pci_ops;
@@ -229,6 +233,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
 extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
 extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
+extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
 
 extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 			    const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6dbc0a1da1f6..a159d48573d7 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,6 +7,8 @@ extern void pnv_smp_init(void);
 static inline void pnv_smp_init(void) { }
 #endif
 
+extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn;
+
 struct pci_dev;
 
 #ifdef CONFIG_PCI
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 1a9d84371a4d..718f50ed22f1 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -16,11 +16,13 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <asm/archrandom.h>
+#include <asm/cputable.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/smp.h>
 
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
 
 struct powernv_rng {
 	void __iomem *regs;
@@ -67,6 +69,41 @@ int powernv_get_random_real_mode(unsigned long *v)
 	return 1;
 }
 
+int powernv_get_random_darn(unsigned long *v)
+{
+	unsigned long val;
+
+	/* Using DARN with L=1 - 64-bit conditioned random number */
+	asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+	if (val == DARN_ERR)
+		return 0;
+
+	*v = val;
+
+	return 1;
+}
+
+static int initialise_darn(void)
+{
+	unsigned long val;
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+
+	for (i = 0; i < 10; i++) {
+		if (powernv_get_random_darn(&val)) {
+			ppc_md.get_random_seed = powernv_get_random_darn;
+			return 0;
+		}
+	}
+
+	pr_warn("Unable to use DARN for get_random_seed()\n");
+
+	return -EIO;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
 	struct powernv_rng *rng;
@@ -88,7 +125,7 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
 
 	chip_id = of_get_ibm_chip_id(dn);
 	if (chip_id == -1)
-		pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+		pr_warn("No ibm,chip-id found for %pOF.\n", dn);
 
 	for_each_possible_cpu(cpu) {
 		if (per_cpu(powernv_rng, cpu) == NULL ||
@@ -141,8 +178,8 @@ static __init int rng_init(void)
 	for_each_compatible_node(dn, NULL, "ibm,power-rng") {
 		rc = rng_create(dn);
 		if (rc) {
-			pr_err("Failed creating rng for %s (%d).\n",
-				dn->full_name, rc);
+			pr_err("Failed creating rng for %pOF (%d).\n",
+				dn, rc);
 			continue;
 		}
 
@@ -150,6 +187,8 @@ static __init int rng_init(void)
 		of_platform_device_create(dn, NULL, NULL);
 	}
 
+	initialise_darn();
+
 	return 0;
 }
 machine_subsys_initcall(powernv, rng_init);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 40dae96f7e20..c17f81e433f7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -57,7 +57,7 @@ static void pnv_smp_setup_cpu(int cpu)
 
 static int pnv_smp_kick_cpu(int nr)
 {
-	unsigned int pcpu = get_hard_smp_processor_id(nr);
+	unsigned int pcpu;
 	unsigned long start_here =
 			__pa(ppc_function_entry(generic_secondary_smp_init));
 	long rc;
@@ -66,6 +66,7 @@ static int pnv_smp_kick_cpu(int nr)
 	if (nr < 0 || nr >= nr_cpu_ids)
 		return -EINVAL;
 
+	pcpu = get_hard_smp_processor_id(nr);
 	/*
 	 * If we already started or OPAL is not supported, we just
 	 * kick the CPU via the PACA
@@ -164,12 +165,6 @@ static void pnv_smp_cpu_kill_self(void)
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		wmask = SRR1_WAKEMASK_P8;
 
-	/* We don't want to take decrementer interrupts while we are offline,
-	 * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9)
-	 * enabled as to let IPIs in.
-	 */
-	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
-
 	while (!generic_check_cpu_restart(cpu)) {
 		/*
 		 * Clear IPI flag, since we don't handle IPIs while
@@ -219,8 +214,6 @@ static void pnv_smp_cpu_kill_self(void)
 
 	}
 
-	/* Re-enable decrementer interrupts */
-	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
 	DBG("CPU%d coming online...\n", cpu);
 }
 
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000000000000..5aae845b8cd9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,1134 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <linux/cred.h>
+
+#include "vas.h"
+#include "copy-paste.h"
+
+/*
+ * Compute the paste address region for the window @window using the
+ * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
+ */
+static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
+{
+	int winid;
+	u64 base, shift;
+
+	base = window->vinst->paste_base_addr;
+	shift = window->vinst->paste_win_id_shift;
+	winid = window->winid;
+
+	*addr  = base + (winid << shift);
+	if (len)
+		*len = PAGE_SIZE;
+
+	pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct vas_window *window,
+			u64 *start, int *len)
+{
+	u64 pbaddr;
+
+	pbaddr = window->vinst->hvwc_bar_start;
+	*start = pbaddr + window->winid * VAS_HVWC_SIZE;
+	*len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct vas_window *window,
+			u64 *start, int *len)
+{
+	u64 pbaddr;
+
+	pbaddr = window->vinst->uwc_bar_start;
+	*start = pbaddr + window->winid * VAS_UWC_SIZE;
+	*len = VAS_UWC_SIZE;
+}
+
+/*
+ * Map the paste bus address of the given send window into kernel address
+ * space. Unlike MMIO regions (map_mmio_region() below), paste region must
+ * be mapped cache-able and is only applicable to send windows.
+ */
+static void *map_paste_region(struct vas_window *txwin)
+{
+	int len;
+	void *map;
+	char *name;
+	u64 start;
+
+	name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
+				txwin->winid);
+	if (!name)
+		goto free_name;
+
+	txwin->paste_addr_name = name;
+	compute_paste_address(txwin, &start, &len);
+
+	if (!request_mem_region(start, len, name)) {
+		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+				__func__, start, len);
+		goto free_name;
+	}
+
+	map = ioremap_cache(start, len);
+	if (!map) {
+		pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
+				start, len);
+		goto free_name;
+	}
+
+	pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
+	return map;
+
+free_name:
+	kfree(name);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void *map_mmio_region(char *name, u64 start, int len)
+{
+	void *map;
+
+	if (!request_mem_region(start, len, name)) {
+		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+				__func__, start, len);
+		return NULL;
+	}
+
+	map = ioremap(start, len);
+	if (!map) {
+		pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+				len);
+		return NULL;
+	}
+
+	return map;
+}
+
+static void unmap_region(void *addr, u64 start, int len)
+{
+	iounmap(addr);
+	release_mem_region((phys_addr_t)start, len);
+}
+
+/*
+ * Unmap the paste address region for a window.
+ */
+static void unmap_paste_region(struct vas_window *window)
+{
+	int len;
+	u64 busaddr_start;
+
+	if (window->paste_kaddr) {
+		compute_paste_address(window, &busaddr_start, &len);
+		unmap_region(window->paste_kaddr, busaddr_start, len);
+		window->paste_kaddr = NULL;
+		kfree(window->paste_addr_name);
+		window->paste_addr_name = NULL;
+	}
+}
+
+/*
+ * Unmap the MMIO regions for a window.
+ */
+static void unmap_winctx_mmio_bars(struct vas_window *window)
+{
+	int len;
+	u64 busaddr_start;
+
+	if (window->hvwc_map) {
+		get_hvwc_mmio_bar(window, &busaddr_start, &len);
+		unmap_region(window->hvwc_map, busaddr_start, len);
+		window->hvwc_map = NULL;
+	}
+
+	if (window->uwc_map) {
+		get_uwc_mmio_bar(window, &busaddr_start, &len);
+		unmap_region(window->uwc_map, busaddr_start, len);
+		window->uwc_map = NULL;
+	}
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+int map_winctx_mmio_bars(struct vas_window *window)
+{
+	int len;
+	u64 start;
+
+	get_hvwc_mmio_bar(window, &start, &len);
+	window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+	get_uwc_mmio_bar(window, &start, &len);
+	window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+	if (!window->hvwc_map || !window->uwc_map) {
+		unmap_winctx_mmio_bars(window);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ *	 offsets in a window context are valid registers and the valid
+ *	 registers are not sequential. And, we can only write to offsets
+ *	 with valid registers.
+ */
+void reset_window_regs(struct vas_window *window)
+{
+	write_hvwc_reg(window, VREG(LPID), 0ULL);
+	write_hvwc_reg(window, VREG(PID), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(AMR), 0ULL);
+	write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+	write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+	write_hvwc_reg(window, VREG(PSWID), 0ULL);
+	write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+	write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+	/* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+	/*
+	 * The send and receive window credit adder registers are also
+	 * accessible from HVWC and have been initialized above. We don't
+	 * need to initialize from the OS/User Window Context, so skip
+	 * following calls:
+	 *
+	 *	write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+	 *	write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	 */
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve the TODOs we may
+ * want to add fields to vas_winctx and move the initialization to
+ * init_vas_winctx_regs().
+ */
+static void init_xlate_regs(struct vas_window *window, bool user_win)
+{
+	u64 lpcr, val;
+
+	/*
+	 * MSR_TA, MSR_US are false for both kernel and user.
+	 * MSR_DR and MSR_PR are false for kernel.
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
+	val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
+	if (user_win) {
+		val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
+		val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
+	}
+	write_hvwc_reg(window, VREG(XLATE_MSR), val);
+
+	lpcr = mfspr(SPRN_LPCR);
+	val = 0ULL;
+	/*
+	 * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
+	 *	 Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
+	 *
+	 * NOTE: From Section 1.3.1, Address Translation Context of the
+	 *	 Nest MMU Workbook, LPCR_SC should be 0 for Power9.
+	 */
+	val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
+	val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
+	val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
+	val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
+	write_hvwc_reg(window, VREG(XLATE_LPCR), val);
+
+	/*
+	 * Section 1.3.1 (Address translation Context) of NMMU workbook.
+	 *	0b00	Hashed Page Table mode
+	 *	0b01	Reserved
+	 *	0b10	Radix on HPT
+	 *	0b11	Radix on Radix
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
+	write_hvwc_reg(window, VREG(XLATE_CTL), val);
+
+	/*
+	 * TODO: Can we mfspr(AMR) even for user windows?
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
+	write_hvwc_reg(window, VREG(AMR), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_SEIDR, val, 0);
+	write_hvwc_reg(window, VREG(SEIDR), val);
+}
+
+/*
+ * Initialize Reserved Send Buffer Count for the send window. It involves
+ * writing to the register, reading it back to confirm that the hardware
+ * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
+ *
+ * Since we can only make a best-effort attempt to fulfill the request,
+ * we don't return any errors if we cannot.
+ *
+ * TODO: Reserved (aka dedicated) send buffers are not supported yet.
+ */
+static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+				struct vas_winctx *winctx)
+{
+	write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+}
+
+/*
+ * init_winctx_regs()
+ *	Initialize window context registers for a receive window.
+ *	Except for caching control and marking window open, the registers
+ *	are initialized in the order listed in Section 3.1.4 (Window Context
+ *	Cache Register Details) of the VAS workbook although they don't need
+ *	to be.
+ *
+ * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
+ *	(so that it can get a large contiguous area) and passes that buffer
+ *	to kernel via device tree. We now write that buffer address to the
+ *	FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
+ *	write the per-chip RX FIFO addresses to the windows during boot-up
+ *	as a one-time task? That could work for NX but what about other
+ *	receivers?  Let the receivers tell us the rx-fifo buffers for now.
+ */
+int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+{
+	u64 val;
+	int fifo_size;
+
+	reset_window_regs(window);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LPID, val, winctx->lpid);
+	write_hvwc_reg(window, VREG(LPID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
+	write_hvwc_reg(window, VREG(PID), val);
+
+	init_xlate_regs(window, winctx->user_win);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
+	write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
+
+	/* In PowerNV, interrupts go to HV. */
+	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
+	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
+	write_hvwc_reg(window, VREG(PSWID), val);
+
+	write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+
+	/*
+	 * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
+	 *	 register as is - do NOT shift the address into VAS_LFIFO_BAR
+	 *	 bit fields! Ok to set the page migration select fields -
+	 *	 VAS ignores the lower 10+ bits in the address anyway, because
+	 *	 the minimum FIFO size is 1K?
+	 *
+	 * See also: Design note in function header.
+	 */
+	val = __pa(winctx->rx_fifo);
+	val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+	write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
+	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
+	val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
+	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
+
+	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
+	write_hvwc_reg(window, VREG(LRX_WCRED), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
+	write_hvwc_reg(window, VREG(TX_WCRED), val);
+
+	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+
+	fifo_size = winctx->rx_fifo_size / 1024;
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
+	write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
+
+	/* Update window control and caching control registers last so
+	 * we mark the window open only after fully initializing it and
+	 * pushing context to cache.
+	 */
+
+	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+
+	init_rsvd_tx_buf_count(window, winctx);
+
+	/* for a send window, point to the matching receive window */
+	val = 0ULL;
+	val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
+	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
+
+	write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
+	val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
+	val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
+	val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
+	write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
+	write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
+	write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
+	write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
+	val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
+	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
+
+	/* Skip read-only registers NX_UTIL and NX_UTIL_SE */
+
+	write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+
+	/* Finally, push window context to memory and... */
+	val = 0ULL;
+	val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
+	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+	/* ... mark the window open for business */
+	val = 0ULL;
+	val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
+	val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
+	val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
+	val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
+	val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
+	val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
+	val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
+	val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
+	val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
+	write_hvwc_reg(window, VREG(WINCTL), val);
+
+	return 0;
+}
+
+static DEFINE_SPINLOCK(vas_ida_lock);
+
+static void vas_release_window_id(struct ida *ida, int winid)
+{
+	spin_lock(&vas_ida_lock);
+	ida_remove(ida, winid);
+	spin_unlock(&vas_ida_lock);
+}
+
+static int vas_assign_window_id(struct ida *ida)
+{
+	int rc, winid;
+
+	do {
+		rc = ida_pre_get(ida, GFP_KERNEL);
+		if (!rc)
+			return -EAGAIN;
+
+		spin_lock(&vas_ida_lock);
+		rc = ida_get_new(ida, &winid);
+		spin_unlock(&vas_ida_lock);
+	} while (rc == -EAGAIN);
+
+	if (rc)
+		return rc;
+
+	if (winid > VAS_WINDOWS_PER_CHIP) {
+		pr_err("Too many (%d) open windows\n", winid);
+		vas_release_window_id(ida, winid);
+		return -EAGAIN;
+	}
+
+	return winid;
+}
+
+static void vas_window_free(struct vas_window *window)
+{
+	int winid = window->winid;
+	struct vas_instance *vinst = window->vinst;
+
+	unmap_winctx_mmio_bars(window);
+	kfree(window);
+
+	vas_release_window_id(&vinst->ida, winid);
+}
+
+static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+{
+	int winid;
+	struct vas_window *window;
+
+	winid = vas_assign_window_id(&vinst->ida);
+	if (winid < 0)
+		return ERR_PTR(winid);
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		goto out_free;
+
+	window->vinst = vinst;
+	window->winid = winid;
+
+	if (map_winctx_mmio_bars(window))
+		goto out_free;
+
+	return window;
+
+out_free:
+	kfree(window);
+	vas_release_window_id(&vinst->ida, winid);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void put_rx_win(struct vas_window *rxwin)
+{
+	/* Better not be a send window! */
+	WARN_ON_ONCE(rxwin->tx_win);
+
+	atomic_dec(&rxwin->num_txwins);
+}
+
+/*
+ * Get the VAS receive window associated with NX engine identified
+ * by @cop and if applicable, @pswid.
+ *
+ * See also function header of set_vinst_win().
+ */
+static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+			enum vas_cop_type cop, u32 pswid)
+{
+	struct vas_window *rxwin;
+
+	mutex_lock(&vinst->mutex);
+
+	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI)
+		rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+	else
+		rxwin = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(rxwin))
+		atomic_inc(&rxwin->num_txwins);
+
+	mutex_unlock(&vinst->mutex);
+
+	return rxwin;
+}
+
+/*
+ * We have two tables of windows in a VAS instance. The first one,
+ * ->windows[], contains all the windows in the instance and allows
+ * looking up a window by its id. It is used to look up send windows
+ * during fault handling and receive windows when pairing user space
+ * send/receive windows.
+ *
+ * The second table, ->rxwin[], contains receive windows that are
+ * associated with NX engines. This table has VAS_COP_TYPE_MAX
+ * entries and is used to look up a receive window by its
+ * coprocessor type.
+ *
+ * Here, we save @window in the ->windows[] table. If it is a receive
+ * window, we also save the window in the ->rxwin[] table.
+ */
+static void set_vinst_win(struct vas_instance *vinst,
+			struct vas_window *window)
+{
+	int id = window->winid;
+
+	mutex_lock(&vinst->mutex);
+
+	/*
+	 * There should only be one receive window for a coprocessor type
+	 * unless its a user (FTW) window.
+	 */
+	if (!window->user_win && !window->tx_win) {
+		WARN_ON_ONCE(vinst->rxwin[window->cop]);
+		vinst->rxwin[window->cop] = window;
+	}
+
+	WARN_ON_ONCE(vinst->windows[id] != NULL);
+	vinst->windows[id] = window;
+
+	mutex_unlock(&vinst->mutex);
+}
+
+/*
+ * Clear this window from the table(s) of windows for this VAS instance.
+ * See also function header of set_vinst_win().
+ */
+static void clear_vinst_win(struct vas_window *window)
+{
+	int id = window->winid;
+	struct vas_instance *vinst = window->vinst;
+
+	mutex_lock(&vinst->mutex);
+
+	if (!window->user_win && !window->tx_win) {
+		WARN_ON_ONCE(!vinst->rxwin[window->cop]);
+		vinst->rxwin[window->cop] = NULL;
+	}
+
+	WARN_ON_ONCE(vinst->windows[id] != window);
+	vinst->windows[id] = NULL;
+
+	mutex_unlock(&vinst->mutex);
+}
+
+static void init_winctx_for_rxwin(struct vas_window *rxwin,
+			struct vas_rx_win_attr *rxattr,
+			struct vas_winctx *winctx)
+{
+	/*
+	 * We first zero (memset()) all fields and only set non-zero fields.
+	 * Following fields are 0/false but maybe deserve a comment:
+	 *
+	 *	->notify_os_intr_reg	In powerNV, send intrs to HV
+	 *	->notify_disable	False for NX windows
+	 *	->intr_disable		False for Fault Windows
+	 *	->xtra_write		False for NX windows
+	 *	->notify_early		NA for NX windows
+	 *	->rsvd_txbuf_count	NA for Rx windows
+	 *	->lpid, ->pid, ->tid	NA for Rx windows
+	 */
+
+	memset(winctx, 0, sizeof(struct vas_winctx));
+
+	winctx->rx_fifo = rxattr->rx_fifo;
+	winctx->rx_fifo_size = rxattr->rx_fifo_size;
+	winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+	winctx->pin_win = rxattr->pin_win;
+
+	winctx->nx_win = rxattr->nx_win;
+	winctx->fault_win = rxattr->fault_win;
+	winctx->rx_word_mode = rxattr->rx_win_ord_mode;
+	winctx->tx_word_mode = rxattr->tx_win_ord_mode;
+	winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
+	winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+
+	if (winctx->nx_win) {
+		winctx->data_stamp = true;
+		winctx->intr_disable = true;
+		winctx->pin_win = true;
+
+		WARN_ON_ONCE(winctx->fault_win);
+		WARN_ON_ONCE(!winctx->rx_word_mode);
+		WARN_ON_ONCE(!winctx->tx_word_mode);
+		WARN_ON_ONCE(winctx->notify_after_count);
+	} else if (winctx->fault_win) {
+		winctx->notify_disable = true;
+	} else if (winctx->user_win) {
+		/*
+		 * Section 1.8.1 Low Latency Core-Core Wake up of
+		 * the VAS workbook:
+		 *
+		 *      - disable credit checks ([tr]x_wcred_mode = false)
+		 *      - disable FIFO writes
+		 *      - enable ASB_Notify, disable interrupt
+		 */
+		winctx->fifo_disable = true;
+		winctx->intr_disable = true;
+		winctx->rx_fifo = NULL;
+	}
+
+	winctx->lnotify_lpid = rxattr->lnotify_lpid;
+	winctx->lnotify_pid = rxattr->lnotify_pid;
+	winctx->lnotify_tid = rxattr->lnotify_tid;
+	winctx->pswid = rxattr->pswid;
+	winctx->dma_type = VAS_DMA_TYPE_INJECT;
+	winctx->tc_mode = rxattr->tc_mode;
+
+	winctx->min_scope = VAS_SCOPE_LOCAL;
+	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+}
+
+static bool rx_win_args_valid(enum vas_cop_type cop,
+			struct vas_rx_win_attr *attr)
+{
+	dump_rx_win_attr(attr);
+
+	if (cop >= VAS_COP_TYPE_MAX)
+		return false;
+
+	if (cop != VAS_COP_TYPE_FTW &&
+				attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
+		return false;
+
+	if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
+		return false;
+
+	if (attr->nx_win) {
+		/* cannot be fault or user window if it is nx */
+		if (attr->fault_win || attr->user_win)
+			return false;
+		/*
+		 * Section 3.1.4.32: NX Windows must not disable notification,
+		 *	and must not enable interrupts or early notification.
+		 */
+		if (attr->notify_disable || !attr->intr_disable ||
+				attr->notify_early)
+			return false;
+	} else if (attr->fault_win) {
+		/* cannot be both fault and user window */
+		if (attr->user_win)
+			return false;
+
+		/*
+		 * Section 3.1.4.32: Fault windows must disable notification
+		 *	but not interrupts.
+		 */
+		if (!attr->notify_disable || attr->intr_disable)
+			return false;
+
+	} else if (attr->user_win) {
+		/*
+		 * User receive windows are only for fast-thread-wakeup
+		 * (FTW). They don't need a FIFO and must disable interrupts
+		 */
+		if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
+			return false;
+	} else {
+		/* Rx window must be one of NX or Fault or User window. */
+		return false;
+	}
+
+	return true;
+}
+
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
+{
+	memset(rxattr, 0, sizeof(*rxattr));
+
+	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+		rxattr->pin_win = true;
+		rxattr->nx_win = true;
+		rxattr->fault_win = false;
+		rxattr->intr_disable = true;
+		rxattr->rx_wcred_mode = true;
+		rxattr->tx_wcred_mode = true;
+		rxattr->rx_win_ord_mode = true;
+		rxattr->tx_win_ord_mode = true;
+	} else if (cop == VAS_COP_TYPE_FAULT) {
+		rxattr->pin_win = true;
+		rxattr->fault_win = true;
+		rxattr->notify_disable = true;
+		rxattr->rx_wcred_mode = true;
+		rxattr->tx_wcred_mode = true;
+		rxattr->rx_win_ord_mode = true;
+		rxattr->tx_win_ord_mode = true;
+	} else if (cop == VAS_COP_TYPE_FTW) {
+		rxattr->user_win = true;
+		rxattr->intr_disable = true;
+
+		/*
+		 * As noted in the VAS Workbook we disable credit checks.
+		 * If we enable credit checks in the future, we must also
+		 * implement a mechanism to return the user credits or new
+		 * paste operations will fail.
+		 */
+	}
+}
+EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
+
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_rx_win_attr *rxattr)
+{
+	struct vas_window *rxwin;
+	struct vas_winctx winctx;
+	struct vas_instance *vinst;
+
+	if (!rx_win_args_valid(cop, rxattr))
+		return ERR_PTR(-EINVAL);
+
+	vinst = find_vas_instance(vasid);
+	if (!vinst) {
+		pr_devel("vasid %d not found!\n", vasid);
+		return ERR_PTR(-EINVAL);
+	}
+	pr_devel("Found instance %d\n", vasid);
+
+	rxwin = vas_window_alloc(vinst);
+	if (IS_ERR(rxwin)) {
+		pr_devel("Unable to allocate memory for Rx window\n");
+		return rxwin;
+	}
+
+	rxwin->tx_win = false;
+	rxwin->nx_win = rxattr->nx_win;
+	rxwin->user_win = rxattr->user_win;
+	rxwin->cop = cop;
+	if (rxattr->user_win)
+		rxwin->pid = task_pid_vnr(current);
+
+	init_winctx_for_rxwin(rxwin, rxattr, &winctx);
+	init_winctx_regs(rxwin, &winctx);
+
+	set_vinst_win(vinst, rxwin);
+
+	return rxwin;
+}
+EXPORT_SYMBOL_GPL(vas_rx_win_open);
+
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
+{
+	memset(txattr, 0, sizeof(*txattr));
+
+	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+		txattr->rej_no_credit = false;
+		txattr->rx_wcred_mode = true;
+		txattr->tx_wcred_mode = true;
+		txattr->rx_win_ord_mode = true;
+		txattr->tx_win_ord_mode = true;
+	} else if (cop == VAS_COP_TYPE_FTW) {
+		txattr->user_win = true;
+	}
+}
+EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
+
+static void init_winctx_for_txwin(struct vas_window *txwin,
+			struct vas_tx_win_attr *txattr,
+			struct vas_winctx *winctx)
+{
+	/*
+	 * We first zero all fields and only set non-zero ones. Following
+	 * are some fields set to 0/false for the stated reason:
+	 *
+	 *	->notify_os_intr_reg	In powernv, send intrs to HV
+	 *	->rsvd_txbuf_count	Not supported yet.
+	 *	->notify_disable	False for NX windows
+	 *	->xtra_write		False for NX windows
+	 *	->notify_early		NA for NX windows
+	 *	->lnotify_lpid		NA for Tx windows
+	 *	->lnotify_pid		NA for Tx windows
+	 *	->lnotify_tid		NA for Tx windows
+	 *	->tx_win_cred_mode	Ignore for now for NX windows
+	 *	->rx_win_cred_mode	Ignore for now for NX windows
+	 */
+	memset(winctx, 0, sizeof(struct vas_winctx));
+
+	winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+	winctx->user_win = txattr->user_win;
+	winctx->nx_win = txwin->rxwin->nx_win;
+	winctx->pin_win = txattr->pin_win;
+
+	winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+	winctx->tx_wcred_mode = txattr->tx_wcred_mode;
+	winctx->rx_word_mode = txattr->rx_win_ord_mode;
+	winctx->tx_word_mode = txattr->tx_win_ord_mode;
+
+	if (winctx->nx_win) {
+		winctx->data_stamp = true;
+		winctx->intr_disable = true;
+	}
+
+	winctx->lpid = txattr->lpid;
+	winctx->pidr = txattr->pidr;
+	winctx->rx_win_id = txwin->rxwin->winid;
+
+	winctx->dma_type = VAS_DMA_TYPE_INJECT;
+	winctx->tc_mode = txattr->tc_mode;
+	winctx->min_scope = VAS_SCOPE_LOCAL;
+	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+
+	winctx->pswid = 0;
+}
+
+static bool tx_win_args_valid(enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr)
+{
+	if (attr->tc_mode != VAS_THRESH_DISABLED)
+		return false;
+
+	if (cop > VAS_COP_TYPE_MAX)
+		return false;
+
+	if (attr->user_win &&
+			(cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
+		return false;
+
+	return true;
+}
+
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr)
+{
+	int rc;
+	struct vas_window *txwin;
+	struct vas_window *rxwin;
+	struct vas_winctx winctx;
+	struct vas_instance *vinst;
+
+	if (!tx_win_args_valid(cop, attr))
+		return ERR_PTR(-EINVAL);
+
+	vinst = find_vas_instance(vasid);
+	if (!vinst) {
+		pr_devel("vasid %d not found!\n", vasid);
+		return ERR_PTR(-EINVAL);
+	}
+
+	rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
+	if (IS_ERR(rxwin)) {
+		pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
+		return rxwin;
+	}
+
+	txwin = vas_window_alloc(vinst);
+	if (IS_ERR(txwin)) {
+		rc = PTR_ERR(txwin);
+		goto put_rxwin;
+	}
+
+	txwin->tx_win = 1;
+	txwin->rxwin = rxwin;
+	txwin->nx_win = txwin->rxwin->nx_win;
+	txwin->pid = attr->pid;
+	txwin->user_win = attr->user_win;
+
+	init_winctx_for_txwin(txwin, attr, &winctx);
+
+	init_winctx_regs(txwin, &winctx);
+
+	/*
+	 * If its a kernel send window, map the window address into the
+	 * kernel's address space. For user windows, user must issue an
+	 * mmap() to map the window into their address space.
+	 *
+	 * NOTE: If kernel ever resubmits a user CRB after handling a page
+	 *	 fault, we will need to map this into kernel as well.
+	 */
+	if (!txwin->user_win) {
+		txwin->paste_kaddr = map_paste_region(txwin);
+		if (IS_ERR(txwin->paste_kaddr)) {
+			rc = PTR_ERR(txwin->paste_kaddr);
+			goto free_window;
+		}
+	}
+
+	set_vinst_win(vinst, txwin);
+
+	return txwin;
+
+free_window:
+	vas_window_free(txwin);
+
+put_rxwin:
+	put_rx_win(rxwin);
+	return ERR_PTR(rc);
+
+}
+EXPORT_SYMBOL_GPL(vas_tx_win_open);
+
+int vas_copy_crb(void *crb, int offset)
+{
+	return vas_copy(crb, offset);
+}
+EXPORT_SYMBOL_GPL(vas_copy_crb);
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+{
+	int rc;
+	void *addr;
+	uint64_t val;
+
+	/*
+	 * Only NX windows are supported for now and hardware assumes
+	 * report-enable flag is set for NX windows. Ensure software
+	 * complies too.
+	 */
+	WARN_ON_ONCE(txwin->nx_win && !re);
+
+	addr = txwin->paste_kaddr;
+	if (re) {
+		/*
+		 * Set the REPORT_ENABLE bit (equivalent to writing
+		 * to 1K offset of the paste address)
+		 */
+		val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
+		addr += val;
+	}
+
+	/*
+	 * Map the raw CR value from vas_paste() to an error code (there
+	 * is just pass or fail for now though).
+	 */
+	rc = vas_paste(addr, offset);
+	if (rc == 2)
+		rc = 0;
+	else
+		rc = -EINVAL;
+
+	print_fifo_msg_count(txwin);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vas_paste_crb);
+
+static void poll_window_busy_state(struct vas_window *window)
+{
+	int busy;
+	u64 val;
+
+retry:
+	/*
+	 * Poll Window Busy flag
+	 */
+	val = read_hvwc_reg(window, VREG(WIN_STATUS));
+	busy = GET_FIELD(VAS_WIN_BUSY, val);
+	if (busy) {
+		val = 0;
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(HZ);
+		goto retry;
+	}
+}
+
+static void poll_window_castout(struct vas_window *window)
+{
+	int cached;
+	u64 val;
+
+	/* Cast window context out of the cache */
+retry:
+	val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
+	cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
+	if (cached) {
+		val = 0ULL;
+		val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
+		val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
+		write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(HZ);
+		goto retry;
+	}
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ *	- Disable new paste operations (unmap paste address)
+ *	- Poll for the "Window Busy" bit to be cleared
+ *	- Clear the Open/Enable bit for the Window.
+ *	- Poll for return of window Credits (implies FIFO empty for Rx win?)
+ *	- Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *window)
+{
+	u64 val;
+
+	if (!window)
+		return 0;
+
+	if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
+		pr_devel("Attempting to close an active Rx window!\n");
+		WARN_ON_ONCE(1);
+		return -EBUSY;
+	}
+
+	unmap_paste_region(window);
+
+	clear_vinst_win(window);
+
+	poll_window_busy_state(window);
+
+	/* Unpin window from cache and close it */
+	val = read_hvwc_reg(window, VREG(WINCTL));
+	val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+	val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+	write_hvwc_reg(window, VREG(WINCTL), val);
+
+	poll_window_castout(window);
+
+	/* if send window, drop reference to matching receive window */
+	if (window->tx_win)
+		put_rx_win(window->rxwin);
+
+	vas_window_free(window);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vas_win_close);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000000000000..565a4878fefa
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+
+#include "vas.h"
+
+static DEFINE_MUTEX(vas_mutex);
+static LIST_HEAD(vas_instances);
+
+static int init_vas_instance(struct platform_device *pdev)
+{
+	int rc, vasid;
+	struct resource *res;
+	struct vas_instance *vinst;
+	struct device_node *dn = pdev->dev.of_node;
+
+	rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
+	if (rc) {
+		pr_err("No ibm,vas-id property for %s?\n", pdev->name);
+		return -ENODEV;
+	}
+
+	if (pdev->num_resources != 4) {
+		pr_err("Unexpected DT configuration for [%s, %d]\n",
+				pdev->name, vasid);
+		return -ENODEV;
+	}
+
+	vinst = kzalloc(sizeof(*vinst), GFP_KERNEL);
+	if (!vinst)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&vinst->node);
+	ida_init(&vinst->ida);
+	mutex_init(&vinst->mutex);
+	vinst->vas_id = vasid;
+	vinst->pdev = pdev;
+
+	res = &pdev->resource[0];
+	vinst->hvwc_bar_start = res->start;
+
+	res = &pdev->resource[1];
+	vinst->uwc_bar_start = res->start;
+
+	res = &pdev->resource[2];
+	vinst->paste_base_addr = res->start;
+
+	res = &pdev->resource[3];
+	if (res->end > 62) {
+		pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
+		goto free_vinst;
+	}
+
+	vinst->paste_win_id_shift = 63 - res->end;
+
+	pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, "
+			"paste_win_id_shift 0x%llx\n", pdev->name, vasid,
+			vinst->paste_base_addr, vinst->paste_win_id_shift);
+
+	mutex_lock(&vas_mutex);
+	list_add(&vinst->node, &vas_instances);
+	mutex_unlock(&vas_mutex);
+
+	dev_set_drvdata(&pdev->dev, vinst);
+
+	return 0;
+
+free_vinst:
+	kfree(vinst);
+	return -ENODEV;
+
+}
+
+/*
+ * Although this is read/used multiple times, it is written to only
+ * during initialization.
+ */
+struct vas_instance *find_vas_instance(int vasid)
+{
+	struct list_head *ent;
+	struct vas_instance *vinst;
+
+	mutex_lock(&vas_mutex);
+	list_for_each(ent, &vas_instances) {
+		vinst = list_entry(ent, struct vas_instance, node);
+		if (vinst->vas_id == vasid) {
+			mutex_unlock(&vas_mutex);
+			return vinst;
+		}
+	}
+	mutex_unlock(&vas_mutex);
+
+	pr_devel("Instance %d not found\n", vasid);
+	return NULL;
+}
+
+static int vas_probe(struct platform_device *pdev)
+{
+	return init_vas_instance(pdev);
+}
+
+static const struct of_device_id powernv_vas_match[] = {
+	{ .compatible = "ibm,vas",},
+	{},
+};
+
+static struct platform_driver vas_driver = {
+	.driver = {
+		.name = "vas",
+		.of_match_table = powernv_vas_match,
+	},
+	.probe = vas_probe,
+};
+
+static int __init vas_init(void)
+{
+	int found = 0;
+	struct device_node *dn;
+
+	platform_driver_register(&vas_driver);
+
+	for_each_compatible_node(dn, NULL, "ibm,vas") {
+		of_platform_device_create(dn, NULL, NULL);
+		found++;
+	}
+
+	if (!found)
+		return -ENODEV;
+
+	pr_devel("Found %d instances\n", found);
+
+	return 0;
+}
+device_initcall(vas_init);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000000000000..38dee5d50f31
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <linux/atomic.h>
+#include <linux/idr.h>
+#include <asm/vas.h>
+#include <linux/io.h>
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" instructions which were introduced in Power9.
+ *
+ * A Power node can have (upto?) 8 Power chips. There is one instance of
+ * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
+ * Senders and receivers must each connect to a separate window before they
+ * can exchange messages through the switchboard.
+ *
+ * Each window is described by two types of window contexts:
+ *
+ *	Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
+ *
+ *	OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
+ *
+ * A window context can be viewed as a set of 64-bit registers. The settings
+ * in these registers configure/control/determine the behavior of the VAS
+ * hardware when messages are sent/received through the window. The registers
+ * in the HVWC are configured by the kernel while the registers in the UWC can
+ * be configured by the kernel or by the user space application that is using
+ * the window.
+ *
+ * The HVWCs for all windows on a specific instance of VAS are in a contiguous
+ * range of hardware addresses or Base address region (BAR) referred to as the
+ * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance
+ * are referred to as the UWC BAR for the instance.
+ *
+ * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet
+ * and available to the kernel in the VAS node's "reg" property in the device
+ * tree:
+ *
+ *	/proc/device-tree/vasm@.../reg
+ *
+ * (see vas_probe() for details on the reg property).
+ *
+ * The kernel maps the HVWC and UWC BAR regions into the kernel address
+ * space (hvwc_map and uwc_map). The kernel can then access the window
+ * contexts of a specific window using:
+ *
+ *	 hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
+ *	 uwc = uwc_map + winid * VAS_UWC_SIZE.
+ *
+ * where winid is the window index (0..64K).
+ *
+ * As mentioned, a window context is used to "configure" a window. Besides
+ * this configuration address, each _send_ window also has a unique hardware
+ * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()).
+ *
+ * The hardware paste address for a window is computed using the "paste
+ * base address" and "paste win id shift" reg properties in the VAS device
+ * tree node using:
+ *
+ *	paste_addr = paste_base + ((winid << paste_win_id_shift))
+ *
+ * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift).
+ *
+ * The kernel maps this hardware address into the sender's address space
+ * after which they can use the 'paste' instruction (new in Power9) to
+ * send a message (submit a request aka CRB) to the coprocessor.
+ *
+ * NOTE: In the initial version, senders can only in-kernel drivers/threads.
+ *	 Support for user space threads will be added in follow-on patches.
+ *
+ * TODO: Do we need to map the UWC into user address space so they can return
+ *	 credits? Its NA for NX but may be needed for other receive windows.
+ *
+ */
+
+#define VAS_WINDOWS_PER_CHIP		(64 << 10)
+
+/*
+ * Hypervisor and OS/USer Window Context sizes
+ */
+#define VAS_HVWC_SIZE			512
+#define VAS_UWC_SIZE			PAGE_SIZE
+
+/*
+ * Initial per-process credits.
+ * Max send window credits:    4K-1 (12-bits in VAS_TX_WCRED)
+ * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
+ *
+ * TODO: Needs tuning for per-process credits
+ */
+#define VAS_WCREDS_MIN			16
+#define VAS_WCREDS_MAX			((64 << 10) - 1)
+#define VAS_WCREDS_DEFAULT		(1 << 10)
+
+/*
+ * VAS Window Context Register Offsets and bitmasks.
+ * See Section 3.1.4 of VAS Work book
+ */
+#define VAS_LPID_OFFSET			0x010
+#define VAS_LPID			PPC_BITMASK(0, 11)
+
+#define VAS_PID_OFFSET			0x018
+#define VAS_PID_ID			PPC_BITMASK(0, 19)
+
+#define VAS_XLATE_MSR_OFFSET		0x020
+#define VAS_XLATE_MSR_DR		PPC_BIT(0)
+#define VAS_XLATE_MSR_TA		PPC_BIT(1)
+#define VAS_XLATE_MSR_PR		PPC_BIT(2)
+#define VAS_XLATE_MSR_US		PPC_BIT(3)
+#define VAS_XLATE_MSR_HV		PPC_BIT(4)
+#define VAS_XLATE_MSR_SF		PPC_BIT(5)
+
+#define VAS_XLATE_LPCR_OFFSET		0x028
+#define VAS_XLATE_LPCR_PAGE_SIZE	PPC_BITMASK(0, 2)
+#define VAS_XLATE_LPCR_ISL		PPC_BIT(3)
+#define VAS_XLATE_LPCR_TC		PPC_BIT(4)
+#define VAS_XLATE_LPCR_SC		PPC_BIT(5)
+
+#define VAS_XLATE_CTL_OFFSET		0x030
+#define VAS_XLATE_MODE			PPC_BITMASK(0, 1)
+
+#define VAS_AMR_OFFSET			0x040
+#define VAS_AMR				PPC_BITMASK(0, 63)
+
+#define VAS_SEIDR_OFFSET		0x048
+#define VAS_SEIDR			PPC_BITMASK(0, 63)
+
+#define VAS_FAULT_TX_WIN_OFFSET		0x050
+#define VAS_FAULT_TX_WIN		PPC_BITMASK(48, 63)
+
+#define VAS_OSU_INTR_SRC_RA_OFFSET	0x060
+#define VAS_OSU_INTR_SRC_RA		PPC_BITMASK(8, 63)
+
+#define VAS_HV_INTR_SRC_RA_OFFSET	0x070
+#define VAS_HV_INTR_SRC_RA		PPC_BITMASK(8, 63)
+
+#define VAS_PSWID_OFFSET		0x078
+#define VAS_PSWID_EA_HANDLE		PPC_BITMASK(0, 31)
+
+#define VAS_SPARE1_OFFSET		0x080
+#define VAS_SPARE2_OFFSET		0x088
+#define VAS_SPARE3_OFFSET		0x090
+#define VAS_SPARE4_OFFSET		0x130
+#define VAS_SPARE5_OFFSET		0x160
+#define VAS_SPARE6_OFFSET		0x188
+
+#define VAS_LFIFO_BAR_OFFSET		0x0A0
+#define VAS_LFIFO_BAR			PPC_BITMASK(8, 53)
+#define VAS_PAGE_MIGRATION_SELECT	PPC_BITMASK(54, 56)
+
+#define VAS_LDATA_STAMP_CTL_OFFSET	0x0A8
+#define VAS_LDATA_STAMP			PPC_BITMASK(0, 1)
+#define VAS_XTRA_WRITE			PPC_BIT(2)
+
+#define VAS_LDMA_CACHE_CTL_OFFSET	0x0B0
+#define VAS_LDMA_TYPE			PPC_BITMASK(0, 1)
+#define VAS_LDMA_FIFO_DISABLE		PPC_BIT(2)
+
+#define VAS_LRFIFO_PUSH_OFFSET		0x0B8
+#define VAS_LRFIFO_PUSH			PPC_BITMASK(0, 15)
+
+#define VAS_CURR_MSG_COUNT_OFFSET	0x0C0
+#define VAS_CURR_MSG_COUNT		PPC_BITMASK(0, 7)
+
+#define VAS_LNOTIFY_AFTER_COUNT_OFFSET	0x0C8
+#define VAS_LNOTIFY_AFTER_COUNT		PPC_BITMASK(0, 7)
+
+#define VAS_LRX_WCRED_OFFSET		0x0E0
+#define VAS_LRX_WCRED			PPC_BITMASK(0, 15)
+
+#define VAS_LRX_WCRED_ADDER_OFFSET	0x190
+#define VAS_LRX_WCRED_ADDER		PPC_BITMASK(0, 15)
+
+#define VAS_TX_WCRED_OFFSET		0x0F0
+#define VAS_TX_WCRED			PPC_BITMASK(4, 15)
+
+#define VAS_TX_WCRED_ADDER_OFFSET	0x1A0
+#define VAS_TX_WCRED_ADDER		PPC_BITMASK(4, 15)
+
+#define VAS_LFIFO_SIZE_OFFSET		0x100
+#define VAS_LFIFO_SIZE			PPC_BITMASK(0, 3)
+
+#define VAS_WINCTL_OFFSET		0x108
+#define VAS_WINCTL_OPEN			PPC_BIT(0)
+#define VAS_WINCTL_REJ_NO_CREDIT	PPC_BIT(1)
+#define VAS_WINCTL_PIN			PPC_BIT(2)
+#define VAS_WINCTL_TX_WCRED_MODE	PPC_BIT(3)
+#define VAS_WINCTL_RX_WCRED_MODE	PPC_BIT(4)
+#define VAS_WINCTL_TX_WORD_MODE		PPC_BIT(5)
+#define VAS_WINCTL_RX_WORD_MODE		PPC_BIT(6)
+#define VAS_WINCTL_RSVD_TXBUF		PPC_BIT(7)
+#define VAS_WINCTL_THRESH_CTL		PPC_BITMASK(8, 9)
+#define VAS_WINCTL_FAULT_WIN		PPC_BIT(10)
+#define VAS_WINCTL_NX_WIN		PPC_BIT(11)
+
+#define VAS_WIN_STATUS_OFFSET		0x110
+#define VAS_WIN_BUSY			PPC_BIT(1)
+
+#define VAS_WIN_CTX_CACHING_CTL_OFFSET	0x118
+#define VAS_CASTOUT_REQ			PPC_BIT(0)
+#define VAS_PUSH_TO_MEM			PPC_BIT(1)
+#define VAS_WIN_CACHE_STATUS		PPC_BIT(4)
+
+#define VAS_TX_RSVD_BUF_COUNT_OFFSET	0x120
+#define VAS_RXVD_BUF_COUNT		PPC_BITMASK(58, 63)
+
+#define VAS_LRFIFO_WIN_PTR_OFFSET	0x128
+#define VAS_LRX_WIN_ID			PPC_BITMASK(0, 15)
+
+/*
+ * Local Notification Control Register controls what happens in _response_
+ * to a paste command and hence applies only to receive windows.
+ */
+#define VAS_LNOTIFY_CTL_OFFSET		0x138
+#define VAS_NOTIFY_DISABLE		PPC_BIT(0)
+#define VAS_INTR_DISABLE		PPC_BIT(1)
+#define VAS_NOTIFY_EARLY		PPC_BIT(2)
+#define VAS_NOTIFY_OSU_INTR		PPC_BIT(3)
+
+#define VAS_LNOTIFY_PID_OFFSET		0x140
+#define VAS_LNOTIFY_PID			PPC_BITMASK(0, 19)
+
+#define VAS_LNOTIFY_LPID_OFFSET		0x148
+#define VAS_LNOTIFY_LPID		PPC_BITMASK(0, 11)
+
+#define VAS_LNOTIFY_TID_OFFSET		0x150
+#define VAS_LNOTIFY_TID			PPC_BITMASK(0, 15)
+
+#define VAS_LNOTIFY_SCOPE_OFFSET	0x158
+#define VAS_LNOTIFY_MIN_SCOPE		PPC_BITMASK(0, 1)
+#define VAS_LNOTIFY_MAX_SCOPE		PPC_BITMASK(2, 3)
+
+#define VAS_NX_UTIL_OFFSET		0x1B0
+#define VAS_NX_UTIL			PPC_BITMASK(0, 63)
+
+/* SE: Side effects */
+#define VAS_NX_UTIL_SE_OFFSET		0x1B8
+#define VAS_NX_UTIL_SE			PPC_BITMASK(0, 63)
+
+#define VAS_NX_UTIL_ADDER_OFFSET	0x180
+#define VAS_NX_UTIL_ADDER		PPC_BITMASK(32, 63)
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ */
+enum vas_notify_scope {
+	VAS_SCOPE_LOCAL,
+	VAS_SCOPE_GROUP,
+	VAS_SCOPE_VECTORED_GROUP,
+	VAS_SCOPE_UNUSED,
+};
+
+/*
+ * Local DMA Cache Control Register (Receive windows only).
+ */
+enum vas_dma_type {
+	VAS_DMA_TYPE_INJECT,
+	VAS_DMA_TYPE_WRITE,
+};
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ * Not applicable to NX receive windows.
+ */
+enum vas_notify_after_count {
+	VAS_NOTIFY_AFTER_256 = 0,
+	VAS_NOTIFY_NONE,
+	VAS_NOTIFY_AFTER_2
+};
+
+/*
+ * One per instance of VAS. Each instance will have a separate set of
+ * receive windows, one per coprocessor type.
+ *
+ * See also function header of set_vinst_win() for details on ->windows[]
+ * and ->rxwin[] tables.
+ */
+struct vas_instance {
+	int vas_id;
+	struct ida ida;
+	struct list_head node;
+	struct platform_device *pdev;
+
+	u64 hvwc_bar_start;
+	u64 uwc_bar_start;
+	u64 paste_base_addr;
+	u64 paste_win_id_shift;
+
+	struct mutex mutex;
+	struct vas_window *rxwin[VAS_COP_TYPE_MAX];
+	struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+};
+
+/*
+ * In-kernel state a VAS window. One per window.
+ */
+struct vas_window {
+	/* Fields common to send and receive windows */
+	struct vas_instance *vinst;
+	int winid;
+	bool tx_win;		/* True if send window */
+	bool nx_win;		/* True if NX window */
+	bool user_win;		/* True if user space window */
+	void *hvwc_map;		/* HV window context */
+	void *uwc_map;		/* OS/User window context */
+	pid_t pid;		/* Linux process id of owner */
+
+	/* Fields applicable only to send windows */
+	void *paste_kaddr;
+	char *paste_addr_name;
+	struct vas_window *rxwin;
+
+	/* Feilds applicable only to receive windows */
+	enum vas_cop_type cop;
+	atomic_t num_txwins;
+};
+
+/*
+ * Container for the hardware state of a window. One per-window.
+ *
+ * A VAS Window context is a 512-byte area in the hardware that contains
+ * a set of 64-bit registers. Individual bit-fields in these registers
+ * determine the configuration/operation of the hardware. struct vas_winctx
+ * is a container for the register fields in the window context.
+ */
+struct vas_winctx {
+	void *rx_fifo;
+	int rx_fifo_size;
+	int wcreds_max;
+	int rsvd_txbuf_count;
+
+	bool user_win;
+	bool nx_win;
+	bool fault_win;
+	bool rsvd_txbuf_enable;
+	bool pin_win;
+	bool rej_no_credit;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_word_mode;
+	bool rx_word_mode;
+	bool data_stamp;
+	bool xtra_write;
+	bool notify_disable;
+	bool intr_disable;
+	bool fifo_disable;
+	bool notify_early;
+	bool notify_os_intr_reg;
+
+	int lpid;
+	int pidr;		/* value from SPRN_PID, not linux pid */
+	int lnotify_lpid;
+	int lnotify_pid;
+	int lnotify_tid;
+	u32 pswid;
+	int rx_win_id;
+	int fault_win_id;
+	int tc_mode;
+
+	u64 irq_port;
+
+	enum vas_dma_type dma_type;
+	enum vas_notify_scope min_scope;
+	enum vas_notify_scope max_scope;
+	enum vas_notify_after_count notify_after_count;
+};
+
+extern struct vas_instance *find_vas_instance(int vasid);
+
+/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ *	- the register's short name in string form ("LPID"), and
+ *	- the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ *	  register's offset in the window context
+ */
+#define VREG_SFX(n, s)	__stringify(n), VAS_##n##s
+#define VREG(r)		VREG_SFX(r, _OFFSET)
+
+#ifdef vas_debug
+static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
+{
+	pr_err("fault %d, notify %d, intr %d early %d\n",
+			attr->fault_win, attr->notify_disable,
+			attr->intr_disable, attr->notify_early);
+
+	pr_err("rx_fifo_size %d, max value %d\n",
+				attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
+}
+
+static inline void vas_log_write(struct vas_window *win, char *name,
+			void *regptr, u64 val)
+{
+	if (val)
+		pr_err("%swin #%d: %s reg %p, val 0x%016llx\n",
+				win->tx_win ? "Tx" : "Rx", win->winid, name,
+				regptr, val);
+}
+
+#else	/* vas_debug */
+
+#define vas_log_write(win, name, reg, val)
+#define dump_rx_win_attr(attr)
+
+#endif	/* vas_debug */
+
+static inline void write_uwc_reg(struct vas_window *win, char *name,
+			s32 reg, u64 val)
+{
+	void *regptr;
+
+	regptr = win->uwc_map + reg;
+	vas_log_write(win, name, regptr, val);
+
+	out_be64(regptr, val);
+}
+
+static inline void write_hvwc_reg(struct vas_window *win, char *name,
+			s32 reg, u64 val)
+{
+	void *regptr;
+
+	regptr = win->hvwc_map + reg;
+	vas_log_write(win, name, regptr, val);
+
+	out_be64(regptr, val);
+}
+
+static inline u64 read_hvwc_reg(struct vas_window *win,
+			char *name __maybe_unused, s32 reg)
+{
+	return in_be64(win->hvwc_map+reg);
+}
+
+#ifdef vas_debug
+
+static void print_fifo_msg_count(struct vas_window *txwin)
+{
+	uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o);
+	pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
+			(uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
+}
+#else	/* vas_debug */
+
+#define print_fifo_msg_count(window)
+
+#endif	/* vas_debug */
+
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 814a7eaa7769..50dbaf24b1ee 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -170,14 +170,8 @@ int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
 
 int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id)
 {
-	int result;
-
-	result = read_node(PS3_LPAR_ID_PME,
-		make_first_field("bus", bus_index),
-		make_field("id", 0),
-		0, 0,
-		bus_id, NULL);
-	return result;
+	return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+			 make_field("id", 0), 0, 0, bus_id, NULL);
 }
 
 int ps3_repository_read_bus_type(unsigned int bus_index,
@@ -224,15 +218,9 @@ int ps3_repository_read_dev_str(unsigned int bus_index,
 int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
 	u64 *dev_id)
 {
-	int result;
-
-	result = read_node(PS3_LPAR_ID_PME,
-		make_first_field("bus", bus_index),
-		make_field("dev", dev_index),
-		make_field("id", 0),
-		0,
-		dev_id, NULL);
-	return result;
+	return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+			 make_field("dev", dev_index), make_field("id", 0), 0,
+			 dev_id, NULL);
 }
 
 int ps3_repository_read_dev_type(unsigned int bus_index,
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 6244bc849469..9dabea6e1443 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,20 +104,6 @@ static void __noreturn ps3_halt(void)
 	ps3_sys_manager_halt(); /* never returns */
 }
 
-static void ps3_panic(char *str)
-{
-	DBG("%s:%d %s\n", __func__, __LINE__, str);
-
-	smp_send_stop();
-	printk("\n");
-	printk("   System does not reboot automatically.\n");
-	printk("   Please press POWER button.\n");
-	printk("\n");
-
-	while(1)
-		lv1_pause(1);
-}
-
 #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
     defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
 static void __init prealloc(struct ps3_prealloc *p)
@@ -269,7 +255,6 @@ define_machine(ps3) {
 	.probe				= ps3_probe,
 	.setup_arch			= ps3_setup_arch,
 	.init_IRQ			= ps3_init_IRQ,
-	.panic				= ps3_panic,
 	.get_boot_time			= ps3_get_boot_time,
 	.set_dabr			= ps3_set_dabr,
 	.calibrate_decr			= ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 3a6dfd14f64b..71dd69d9ec64 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -7,6 +7,7 @@ config PPC_PSERIES
 	select PCI
 	select PCI_MSI
 	select PPC_XICS
+	select PPC_XIVE_SPAPR
 	select PPC_ICP_NATIVE
 	select PPC_ICP_HV
 	select PPC_ICS_RTAS
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 39187696ee74..783f36364690 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -254,18 +254,15 @@ cc_error:
 	return first_dn;
 }
 
-int dlpar_attach_node(struct device_node *dn)
+int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
 {
 	int rc;
 
-	dn->parent = pseries_of_derive_parent(dn->full_name);
-	if (IS_ERR(dn->parent))
-		return PTR_ERR(dn->parent);
+	dn->parent = parent;
 
 	rc = of_attach_node(dn);
 	if (rc) {
-		printk(KERN_ERR "Failed to add device node %s\n",
-		       dn->full_name);
+		printk(KERN_ERR "Failed to add device node %pOF\n", dn);
 		return rc;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1eef46d9cf30..6b812ad990e4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -247,14 +247,13 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 
 	/* Initialize the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
-	pe.phb = edev->phb;
+	pe.phb = pdn->phb;
 	pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 
 	/* Enable EEH on the device */
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 	if (!ret) {
 		/* Retrieve PE address */
-		edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
 		pe.addr = edev->pe_config_addr;
 
@@ -279,7 +278,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 			/* This device doesn't support EEH, but it may have an
 			 * EEH parent, in which case we mark it as supported.
 			 */
-			edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr;
 			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
 			eeh_add_to_parent_pe(edev);
 		}
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 32187dc76730..6eeb0d4bab61 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -36,8 +36,8 @@ void request_event_sources_irqs(struct device_node *np,
 		virqs[count] = irq_create_of_mapping(&oirq);
 		if (!virqs[count]) {
 			pr_err("event-sources: Unable to allocate "
-			       "interrupt number for %s\n",
-			       np->full_name);
+			       "interrupt number for %pOF\n",
+			       np);
 			WARN_ON(1);
 		} else {
 			count++;
@@ -48,7 +48,7 @@ void request_event_sources_irqs(struct device_node *np,
 	for (i = 0; i < count; i++) {
 		if (request_irq(virqs[i], handler, 0, name, NULL)) {
 			pr_err("event-sources: Unable to request interrupt "
-			       "%d for %s\n", virqs[i], np->full_name);
+			       "%d for %pOF\n", virqs[i], np);
 			WARN_ON(1);
 			return;
 		}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6afd1efd3633..fc0d8f97c03a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -34,6 +34,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
 
 #include "pseries.h"
@@ -109,7 +110,10 @@ static void pseries_mach_cpu_die(void)
 
 	local_irq_disable();
 	idle_task_exit();
-	xics_teardown_cpu();
+	if (xive_enabled())
+		xive_teardown_cpu();
+	else
+		xics_teardown_cpu();
 
 	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
 		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
@@ -174,7 +178,10 @@ static int pseries_cpu_disable(void)
 		boot_cpuid = cpumask_any(cpu_online_mask);
 
 	/* FIXME: abstract this to not be platform specific later on */
-	xics_migrate_irqs_away();
+	if (xive_enabled())
+		xive_smp_disable_cpu();
+	else
+		xics_migrate_irqs_away();
 	return 0;
 }
 
@@ -264,8 +271,8 @@ static int pseries_add_processor(struct device_node *np)
 		/* If we get here, it most likely means that NR_CPUS is
 		 * less than the partition's max processors setting.
 		 */
-		printk(KERN_ERR "Cannot add cpu %s; this system configuration"
-		       " supports %d logical cpus.\n", np->full_name,
+		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
+		       " supports %d logical cpus.\n", np,
 		       num_possible_cpus());
 		goto out_unlock;
 	}
@@ -463,7 +470,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return -EINVAL;
 	}
 
-	rc = dlpar_attach_node(dn);
+	rc = dlpar_attach_node(dn, parent);
 	if (rc) {
 		saved_rc = rc;
 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ca9b2f4aaa22..1d48ab424bd9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -336,7 +336,38 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
 	return mem_block;
 }
 
+static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+{
+	struct memory_block *mem_block;
+	int rc;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	if (online && mem_block->dev.offline)
+		rc = device_online(&mem_block->dev);
+	else if (!online && !mem_block->dev.offline)
+		rc = device_offline(&mem_block->dev);
+	else
+		rc = 0;
+
+	put_device(&mem_block->dev);
+
+	return rc;
+}
+
+static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+{
+	return dlpar_change_lmb_state(lmb, true);
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+{
+	return dlpar_change_lmb_state(lmb, false);
+}
+
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long block_sz, start_pfn;
@@ -431,19 +462,13 @@ static int dlpar_add_lmb(struct of_drconf_cell *);
 
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
-	struct memory_block *mem_block;
 	unsigned long block_sz;
 	int nid, rc;
 
 	if (!lmb_is_removable(lmb))
 		return -EINVAL;
 
-	mem_block = lmb_to_memblock(lmb);
-	if (!mem_block)
-		return -EINVAL;
-
-	rc = device_offline(&mem_block->dev);
-	put_device(&mem_block->dev);
+	rc = dlpar_offline_lmb(lmb);
 	if (rc)
 		return rc;
 
@@ -737,20 +762,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
-{
-	struct memory_block *mem_block;
-	int rc;
-
-	mem_block = lmb_to_memblock(lmb);
-	if (!mem_block)
-		return -EINVAL;
-
-	rc = device_online(&mem_block->dev);
-	put_device(&mem_block->dev);
-	return rc;
-}
-
 static int dlpar_add_lmb(struct of_drconf_cell *lmb)
 {
 	unsigned long block_sz;
@@ -817,6 +828,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
 		return -EINVAL;
 
 	for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+			continue;
+
 		rc = dlpar_acquire_drc(lmbs[i].drc_index);
 		if (rc)
 			continue;
@@ -859,6 +873,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
 				lmbs[i].base_addr, lmbs[i].drc_index);
 			lmbs[i].reserved = 0;
 		}
+		rc = 0;
 	}
 
 	return rc;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 74b5b8e239c8..c511a1743a44 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -23,7 +23,7 @@
 
 	.globl hcall_tracepoint_refcount
 hcall_tracepoint_refcount:
-	.llong	0
+	.8byte	0
 
 	.section	".text"
 #endif
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 52146b1356d2..408a86044133 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -150,8 +150,7 @@ static const struct dma_map_ops ibmebus_dma_ops = {
 static int ibmebus_match_path(struct device *dev, void *data)
 {
 	struct device_node *dn = to_platform_device(dev)->dev.of_node;
-	return (dn->full_name &&
-		(strcasecmp((char *)data, dn->full_name) == 0));
+	return (of_find_node_by_path(data) == dn);
 }
 
 static int ibmebus_match_node(struct device *dev, void *data)
@@ -395,7 +394,7 @@ static ssize_t devspec_show(struct device *dev,
 	struct platform_device *ofdev;
 
 	ofdev = to_platform_device(dev);
-	return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
+	return sprintf(buf, "%pOF\n", ofdev->dev.of_node);
 }
 static DEVICE_ATTR_RO(devspec);
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8374adee27e3..7c181467d0ad 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -511,8 +511,8 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	basep = of_get_property(node, "linux,tce-base", NULL);
 	sizep = of_get_property(node, "linux,tce-size", NULL);
 	if (basep == NULL || sizep == NULL) {
-		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
-				"missing tce entries !\n", dn->full_name);
+		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
+				"missing tce entries !\n", dn);
 		return;
 	}
 
@@ -587,7 +587,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
 	dn = pci_bus_to_OF_node(bus);
 
-	pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
+	pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
 
 	if (bus->self) {
 		/* This is not a root bus, any setup will be done for the
@@ -701,8 +701,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 
 	dn = pci_bus_to_OF_node(bus);
 
-	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n",
-		 dn->full_name);
+	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
+		 dn);
 
 	/* Find nearest ibm,dma-window, walking up the device tree */
 	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
@@ -718,8 +718,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 
 	ppci = PCI_DN(pdn);
 
-	pr_debug("  parent is %s, iommu_table: 0x%p\n",
-		 pdn->full_name, ppci->table_group);
+	pr_debug("  parent is %pOF, iommu_table: 0x%p\n",
+		 pdn, ppci->table_group);
 
 	if (!ppci->table_group) {
 		ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
@@ -817,28 +817,28 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 	ret = tce_clearrange_multi_pSeriesLP(0,
 		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
 	if (ret)
-		pr_warning("%s failed to clear tces in window.\n",
-			 np->full_name);
+		pr_warning("%pOF failed to clear tces in window.\n",
+			 np);
 	else
-		pr_debug("%s successfully cleared tces in window.\n",
-			 np->full_name);
+		pr_debug("%pOF successfully cleared tces in window.\n",
+			 np);
 
 	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 	if (ret)
-		pr_warning("%s: failed to remove direct window: rtas returned "
+		pr_warning("%pOF: failed to remove direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddw_avail[2], liobn);
+			np, ret, ddw_avail[2], liobn);
 	else
-		pr_debug("%s: successfully removed direct window: rtas returned "
+		pr_debug("%pOF: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddw_avail[2], liobn);
+			np, ret, ddw_avail[2], liobn);
 
 delprop:
 	if (remove_prop)
 		ret = of_remove_property(np, win64);
 	if (ret)
-		pr_warning("%s: failed to remove direct window property: %d\n",
-			np->full_name, ret);
+		pr_warning("%pOF: failed to remove direct window property: %d\n",
+			np, ret);
 }
 
 static u64 find_existing_ddw(struct device_node *pdn)
@@ -1004,7 +1004,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * list.
 	 */
 	list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
-		if (!strcmp(fpdn->pdn->full_name, pdn->full_name))
+		if (fpdn->pdn == pdn)
 			goto out_unlock;
 	}
 
@@ -1087,8 +1087,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	ddwprop->tce_shift = cpu_to_be32(page_shift);
 	ddwprop->window_shift = cpu_to_be32(len);
 
-	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
-		  create.liobn, dn->full_name);
+	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
+		  create.liobn, dn);
 
 	window = kzalloc(sizeof(*window), GFP_KERNEL);
 	if (!window)
@@ -1097,15 +1097,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
 			win64->value, tce_setrange_multi_pSeriesLP_walk);
 	if (ret) {
-		dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
-			 dn->full_name, ret);
+		dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
+			 dn, ret);
 		goto out_free_window;
 	}
 
 	ret = of_add_property(pdn, win64);
 	if (ret) {
-		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
-			 pdn->full_name, ret);
+		dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
+			 pdn, ret);
 		goto out_free_window;
 	}
 
@@ -1158,7 +1158,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 	 * already allocated.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	pr_debug("  node is %s\n", dn->full_name);
+	pr_debug("  node is %pOF\n", dn);
 
 	for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
 	     pdn = pdn->parent) {
@@ -1169,11 +1169,11 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 
 	if (!pdn || !PCI_DN(pdn)) {
 		printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
-		       "no DMA window found for pci dev=%s dn=%s\n",
-				 pci_name(dev), of_node_full_name(dn));
+		       "no DMA window found for pci dev=%s dn=%pOF\n",
+				 pci_name(dev), dn);
 		return;
 	}
-	pr_debug("  parent is %s\n", pdn->full_name);
+	pr_debug("  parent is %pOF\n", pdn);
 
 	pci = PCI_DN(pdn);
 	if (!pci->table_group) {
@@ -1213,7 +1213,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	/* only attempt to use a new window if 64-bit DMA is requested */
 	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
 		dn = pci_device_to_OF_node(pdev);
-		dev_dbg(dev, "node is %s\n", dn->full_name);
+		dev_dbg(dev, "node is %pOF\n", dn);
 
 		/*
 		 * the device tree might contain the dma-window properties
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 6681ac97fb18..eeb13429d685 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -15,6 +15,7 @@
 #include <asm/firmware.h>
 #include <asm/kexec.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/smp.h>
 #include <asm/plpar_wrappers.h>
 
@@ -51,5 +52,8 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 		}
 	}
 
-	xics_kexec_teardown_cpu(secondary);
+	if (xive_enabled())
+		xive_kexec_teardown_cpu(secondary);
+	else
+		xics_kexec_teardown_cpu(secondary);
 }
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 2da4851eff99..210ce632d63e 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -229,7 +229,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 	if (!dn)
 		return -ENOENT;
 
-	rc = dlpar_attach_node(dn);
+	rc = dlpar_attach_node(dn, parent_dn);
 	if (rc)
 		dlpar_free_cc_nodes(dn);
 
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 326ef0dd6038..b7496948129e 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -132,19 +132,14 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
 static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 {
 	struct device_node *dn;
-	struct pci_dn *pdn;
 	const __be32 *p;
 	u32 req_msi;
 
-	pdn = pci_get_pdn(pdev);
-	if (!pdn)
-		return -ENODEV;
-
-	dn = pdn->node;
+	dn = pci_device_to_OF_node(pdev);
 
 	p = of_get_property(dn, prop_name, NULL);
 	if (!p) {
-		pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name);
+		pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn);
 		return -ENOENT;
 	}
 
@@ -182,8 +177,8 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 	while (dn) {
 		p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
 		if (p) {
-			pr_debug("rtas_msi: found prop on dn %s\n",
-				dn->full_name);
+			pr_debug("rtas_msi: found prop on dn %pOF\n",
+				dn);
 			*total = be32_to_cpup(p);
 			return dn;
 		}
@@ -197,7 +192,6 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
-	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
 	/* Found our PE and assume 8 at that point. */
@@ -210,8 +204,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 	edev = pdn_to_eeh_dev(PCI_DN(dn));
 	if (edev->pe)
 		edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
-	pdn = eeh_dev_to_pdn(edev);
-	dn = pdn ? pdn->node : NULL;
+	dn = pci_device_to_OF_node(edev->pdev);
 	if (!dn)
 		return NULL;
 
@@ -222,7 +215,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 
 	/* Hardcode of 8 for old firmwares */
 	*total = 8;
-	pr_debug("rtas_msi: using PE dn %s\n", dn->full_name);
+	pr_debug("rtas_msi: using PE dn %pOF\n", dn);
 
 	return dn;
 }
@@ -242,7 +235,7 @@ static void *count_non_bridge_devices(struct device_node *dn, void *data)
 	const __be32 *p;
 	u32 class;
 
-	pr_debug("rtas_msi: counting %s\n", dn->full_name);
+	pr_debug("rtas_msi: counting %pOF\n", dn);
 
 	p = of_get_property(dn, "class-code", NULL);
 	class = p ? be32_to_cpup(p) : 0;
@@ -300,7 +293,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
 		goto out;
 	}
 
-	pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name);
+	pr_debug("rtas_msi: found PE %pOF\n", pe_dn);
 
 	memset(&counts, 0, sizeof(struct msi_counts));
 
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 547fd13e4f8e..561917fa54a8 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -38,7 +38,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 {
 	struct pci_controller *phb;
 
-	pr_debug("PCI: Initializing new hotplug PHB %s\n", dn->full_name);
+	pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);
 
 	phb = pcibios_alloc_controller(dn);
 	if (!phb)
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1361a9db534b..4470a3194311 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -46,7 +46,7 @@ extern void dlpar_free_cc_nodes(struct device_node *);
 extern void dlpar_free_cc_property(struct property *);
 extern struct device_node *dlpar_configure_connector(__be32,
 						struct device_node *);
-extern int dlpar_attach_node(struct device_node *);
+extern int dlpar_attach_node(struct device_node *, struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 164a13d3998a..35c891aabef0 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -229,10 +229,9 @@ static int __init pseries_energy_init(void)
 	int cpu, err;
 	struct device *cpu_dev;
 
-	if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) {
-		printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
-		return 0;
-	}
+	if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY))
+		return 0; /* H_BEST_ENERGY hcall not supported */
+
 	/* Create the sysfs files */
 	err = device_create_file(cpu_subsys.dev_root,
 				&attr_cpu_activate_hint_list);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index bb70b26334f0..4923ffe230cf 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -379,6 +379,21 @@ static void fwnmi_release_errinfo(void)
 
 int pSeries_system_reset_exception(struct pt_regs *regs)
 {
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
+	 * to detect the bad SRR1 pattern here. Flip the NIP back to correct
+	 * endian for reporting purposes. Unfortunately the MSR can't be fixed,
+	 * so clear it. It will be missing MSR_RI so we won't try to recover.
+	 */
+	if ((be64_to_cpu(regs->msr) &
+			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
+			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
+		regs->nip = be64_to_cpu((__be64)regs->nip);
+		regs->msr = 0;
+	}
+#endif
+
 	if (fwnmi_active) {
 		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
 		if (errhdr) {
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 011ef2180fe6..296c188fd5ca 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -362,20 +362,13 @@ static int do_update_property(char *buf, size_t bufsize)
 static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
 			  loff_t *off)
 {
-	int rv = 0;
+	int rv;
 	char *kbuf;
 	char *tmp;
 
-	if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
-		rv = -ENOMEM;
-		goto out;
-	}
-	if (copy_from_user(kbuf, buf, count)) {
-		rv = -EFAULT;
-		goto out;
-	}
-
-	kbuf[count] = '\0';
+	kbuf = memdup_user_nul(buf, count);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
 
 	tmp = strchr(kbuf, ' ');
 	if (!tmp) {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b5d86426e97b..5f1beb8367ac 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -57,6 +57,7 @@
 #include <asm/nvram.h>
 #include <asm/pmc.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/ppc-pci.h>
 #include <asm/i8259.h>
 #include <asm/udbg.h>
@@ -176,8 +177,11 @@ static void __init pseries_setup_i8259_cascade(void)
 
 static void __init pseries_init_irq(void)
 {
-	xics_init();
-	pseries_setup_i8259_cascade();
+	/* Try using a XIVE if available, otherwise use a XICS */
+	if (!xive_spapr_init()) {
+		xics_init();
+		pseries_setup_i8259_cascade();
+	}
 }
 
 static void pseries_lpar_enable_pmcs(void)
@@ -722,7 +726,6 @@ define_machine(pseries) {
 	.pcibios_fixup		= pSeries_final_fixup,
 	.restart		= rtas_restart,
 	.halt			= rtas_halt,
-	.panic			= rtas_os_term,
 	.get_boot_time		= rtas_get_boot_time,
 	.get_rtc_time		= rtas_get_rtc_time,
 	.set_rtc_time		= rtas_set_rtc_time,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 24785f63fb40..2e184829e5d4 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -41,6 +41,7 @@
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
+#include <asm/xive.h>
 #include <asm/dbell.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/code-patching.h>
@@ -136,7 +137,9 @@ out:
 
 static void smp_setup_cpu(int cpu)
 {
-	if (cpu != boot_cpuid)
+	if (xive_enabled())
+		xive_smp_setup_cpu();
+	else if (cpu != boot_cpuid)
 		xics_setup_cpu();
 
 	if (firmware_has_feature(FW_FEATURE_SPLPAR))
@@ -181,6 +184,13 @@ static int smp_pSeries_kick_cpu(int nr)
 	return 0;
 }
 
+static int pseries_smp_prepare_cpu(int cpu)
+{
+	if (xive_enabled())
+		return xive_smp_prepare_cpu(cpu);
+	return 0;
+}
+
 static void smp_pseries_cause_ipi(int cpu)
 {
 	/* POWER9 should not use this handler */
@@ -211,7 +221,7 @@ static int pseries_cause_nmi_ipi(int cpu)
 	return 0;
 }
 
-static __init void pSeries_smp_probe(void)
+static __init void pSeries_smp_probe_xics(void)
 {
 	xics_smp_probe();
 
@@ -221,11 +231,24 @@ static __init void pSeries_smp_probe(void)
 		smp_ops->cause_ipi = icp_ops->cause_ipi;
 }
 
+static __init void pSeries_smp_probe(void)
+{
+	if (xive_enabled())
+		/*
+		 * Don't use P9 doorbells when XIVE is enabled. IPIs
+		 * using MMIOs should be faster
+		 */
+		xive_smp_probe();
+	else
+		pSeries_smp_probe_xics();
+}
+
 static struct smp_ops_t pseries_smp_ops = {
 	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
 	.cause_ipi	= NULL,	/* Filled at runtime by pSeries_smp_probe() */
 	.cause_nmi_ipi	= pseries_cause_nmi_ipi,
 	.probe		= pSeries_smp_probe,
+	.prepare_cpu	= pseries_smp_prepare_cpu,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_setup_cpu,
 	.cpu_bootable	= smp_generic_cpu_bootable,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 8a47f168476b..12277bc9fd9e 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1357,14 +1357,14 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
 	 */
 	parent_node = of_get_parent(of_node);
 	if (parent_node) {
-		if (!strcmp(parent_node->full_name, "/ibm,platform-facilities"))
+		if (!strcmp(parent_node->type, "ibm,platform-facilities"))
 			family = PFO;
-		else if (!strcmp(parent_node->full_name, "/vdevice"))
+		else if (!strcmp(parent_node->type, "vdevice"))
 			family = VDEVICE;
 		else {
-			pr_warn("%s: parent(%s) of %s not recognized.\n",
+			pr_warn("%s: parent(%pOF) of %s not recognized.\n",
 					__func__,
-					parent_node->full_name,
+					parent_node,
 					of_node_name);
 			of_node_put(parent_node);
 			return NULL;
@@ -1555,7 +1555,7 @@ static ssize_t devspec_show(struct device *dev,
 {
 	struct device_node *of_node = dev->of_node;
 
-	return sprintf(buf, "%s\n", of_node_full_name(of_node));
+	return sprintf(buf, "%pOF\n", of_node);
 }
 static DEVICE_ATTR_RO(devspec);
 
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index 3696ea6c4826..4aad9dd10ace 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -67,7 +67,7 @@ master:
 	mr	%r16,%r3	/* save dt address in reg16 */
 	li	%r4,20
 	LWZX_BE	%r6,%r3,%r4	/* fetch __be32 version number at byte 20 */
-	cmpwi	%r0,%r6,2	/* v2 or later? */
+	cmpwi	%cr0,%r6,2	/* v2 or later? */
 	blt	1f
 	li	%r4,28
 	STWX_BE	%r17,%r3,%r4	/* Store my cpu as __be32 at byte 28 */
@@ -104,13 +104,13 @@ master:
 	.balign 8
 	.globl kernel
 kernel:
-	.llong  0x0
+	.8byte  0x0
 	.size kernel, . - kernel
 
 	.balign 8
 	.globl dt_offset
 dt_offset:
-	.llong  0x0
+	.8byte  0x0
 	.size dt_offset, . - dt_offset
 
 
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index c0ae11d4f62f..79416fa2e3ba 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -36,25 +36,15 @@ obj-$(CONFIG_AXON_RAM)		+= axonram.o
 obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
 obj-$(CONFIG_PPC_I8259)		+= i8259.o
 obj-$(CONFIG_IPIC)		+= ipic.o
-obj-$(CONFIG_4xx)		+= uic.o
-obj-$(CONFIG_PPC4xx_OCM)	+= ppc4xx_ocm.o
-obj-$(CONFIG_4xx_SOC)		+= ppc4xx_soc.o
 obj-$(CONFIG_XILINX_VIRTEX)	+= xilinx_intc.o
 obj-$(CONFIG_XILINX_PCI)	+= xilinx_pci.o
 obj-$(CONFIG_OF_RTC)		+= of_rtc.o
-ifeq ($(CONFIG_PCI),y)
-obj-$(CONFIG_4xx)		+= ppc4xx_pci.o
-endif
-obj-$(CONFIG_PPC4xx_HSTA_MSI)	+= ppc4xx_hsta_msi.o
-obj-$(CONFIG_PPC4xx_MSI)	+= ppc4xx_msi.o
-obj-$(CONFIG_PPC4xx_CPM)	+= ppc4xx_cpm.o
-obj-$(CONFIG_PPC4xx_GPIO)	+= ppc4xx_gpio.o
 
 obj-$(CONFIG_CPM)		+= cpm_common.o
+obj-$(CONFIG_CPM1)		+= cpm1.o
 obj-$(CONFIG_CPM2)		+= cpm2.o cpm2_pic.o
 obj-$(CONFIG_QUICC_ENGINE)	+= cpm_common.o
 obj-$(CONFIG_PPC_DCR)		+= dcr.o
-obj-$(CONFIG_8xx)		+= mpc8xx_pic.o cpm1.o
 obj-$(CONFIG_UCODE_PATCH)	+= micropatch.o
 
 obj-$(CONFIG_PPC_MPC512x)	+= mpc5xxx_clocks.o
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 2799706106c6..c60e84e4558d 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev)
 static blk_qc_t
 axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 {
-	struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
+	struct axon_ram_bank *bank = bio->bi_disk->private_data;
 	unsigned long phys_mem, phys_end;
 	void *user_mem;
 	struct bio_vec vec;
@@ -188,15 +188,12 @@ static int axon_ram_probe(struct platform_device *device)
 
 	axon_ram_bank_id++;
 
-	dev_info(&device->dev, "Found memory controller on %s\n",
-			device->dev.of_node->full_name);
+	dev_info(&device->dev, "Found memory controller on %pOF\n",
+			device->dev.of_node);
 
-	bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
-	if (bank == NULL) {
-		dev_err(&device->dev, "Out of memory\n");
-		rc = -ENOMEM;
-		goto failed;
-	}
+	bank = kzalloc(sizeof(*bank), GFP_KERNEL);
+	if (!bank)
+		return -ENOMEM;
 
 	device->dev.platform_data = bank;
 
@@ -292,25 +289,22 @@ static int axon_ram_probe(struct platform_device *device)
 	return 0;
 
 failed:
-	if (bank != NULL) {
-		if (bank->irq_id)
-			free_irq(bank->irq_id, device);
-		if (bank->disk != NULL) {
-			if (bank->disk->major > 0)
-				unregister_blkdev(bank->disk->major,
-						bank->disk->disk_name);
-			if (bank->disk->flags & GENHD_FL_UP)
-				del_gendisk(bank->disk);
-			put_disk(bank->disk);
-		}
-		kill_dax(bank->dax_dev);
-		put_dax(bank->dax_dev);
-		device->dev.platform_data = NULL;
-		if (bank->io_addr != 0)
-			iounmap((void __iomem *) bank->io_addr);
-		kfree(bank);
+	if (bank->irq_id)
+		free_irq(bank->irq_id, device);
+	if (bank->disk != NULL) {
+		if (bank->disk->major > 0)
+			unregister_blkdev(bank->disk->major,
+					bank->disk->disk_name);
+		if (bank->disk->flags & GENHD_FL_UP)
+			del_gendisk(bank->disk);
+		put_disk(bank->disk);
 	}
-
+	kill_dax(bank->dax_dev);
+	put_dax(bank->dax_dev);
+	device->dev.platform_data = NULL;
+	if (bank->io_addr != 0)
+		iounmap((void __iomem *) bank->io_addr);
+	kfree(bank);
 	return rc;
 }
 
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index 121e26fffd50..d72eda568b7d 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -195,8 +195,8 @@ dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
 	dcr_host_mmio_t ret = { .token = NULL, .stride = 0, .base = dcr_n };
 	u64 addr;
 
-	pr_debug("dcr_map(%s, 0x%x, 0x%x)\n",
-		 dev->full_name, dcr_n, dcr_c);
+	pr_debug("dcr_map(%pOF, 0x%x, 0x%x)\n",
+		 dev, dcr_n, dcr_c);
 
 	addr = of_translate_dcr_address(dev, dcr_n, &ret.stride);
 	pr_debug("translates to addr: 0x%llx, stride: 0x%x\n",
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 37a69097e022..00ccf3e4fcb4 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -101,8 +101,8 @@ int __init instantiate_cache_sram(struct platform_device *dev,
 
 	if (!request_mem_region(cache_sram->base_phys, cache_sram->size,
 						"fsl_85xx_cache_sram")) {
-		dev_err(&dev->dev, "%s: request memory failed\n",
-				dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "%pOF: request memory failed\n",
+				dev->dev.of_node);
 		ret = -ENXIO;
 		goto out_free;
 	}
@@ -110,16 +110,16 @@ int __init instantiate_cache_sram(struct platform_device *dev,
 	cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
 				cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
 	if (!cache_sram->base_virt) {
-		dev_err(&dev->dev, "%s: ioremap_prot failed\n",
-				dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "%pOF: ioremap_prot failed\n",
+				dev->dev.of_node);
 		ret = -ENOMEM;
 		goto out_release;
 	}
 
 	cache_sram->rh = rh_create(sizeof(unsigned int));
 	if (IS_ERR(cache_sram->rh)) {
-		dev_err(&dev->dev, "%s: Unable to create remote heap\n",
-				dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "%pOF: Unable to create remote heap\n",
+				dev->dev.of_node);
 		ret = PTR_ERR(cache_sram->rh);
 		goto out_unmap;
 	}
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index a6f0b96ce2c9..d902306f4718 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -388,8 +388,8 @@ static int __init fsl_gtm_init(void)
 
 		gtm = kzalloc(sizeof(*gtm), GFP_KERNEL);
 		if (!gtm) {
-			pr_err("%s: unable to allocate memory\n",
-				np->full_name);
+			pr_err("%pOF: unable to allocate memory\n",
+				np);
 			continue;
 		}
 
@@ -397,7 +397,7 @@ static int __init fsl_gtm_init(void)
 
 		clock = of_get_property(np, "clock-frequency", &size);
 		if (!clock || size != sizeof(*clock)) {
-			pr_err("%s: no clock-frequency\n", np->full_name);
+			pr_err("%pOF: no clock-frequency\n", np);
 			goto err;
 		}
 		gtm->clock = *clock;
@@ -407,8 +407,8 @@ static int __init fsl_gtm_init(void)
 
 			irq = irq_of_parse_and_map(np, i);
 			if (!irq) {
-				pr_err("%s: not enough interrupts specified\n",
-				       np->full_name);
+				pr_err("%pOF: not enough interrupts specified\n",
+				       np);
 				goto err;
 			}
 			gtm->timers[i].irq = irq;
@@ -417,8 +417,8 @@ static int __init fsl_gtm_init(void)
 
 		gtm->regs = of_iomap(np, 0);
 		if (!gtm->regs) {
-			pr_err("%s: unable to iomap registers\n",
-			       np->full_name);
+			pr_err("%pOF: unable to iomap registers\n",
+			       np);
 			goto err;
 		}
 
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 8a244828782e..44cbf4c12ea1 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -214,8 +214,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			phandle = np->phandle;
 		else {
 			dev_err(&pdev->dev,
-				"node %s has an invalid fsl,msi phandle %u\n",
-				hose->dn->full_name, np->phandle);
+				"node %pOF has an invalid fsl,msi phandle %u\n",
+				hose->dn, np->phandle);
 			return -EINVAL;
 		}
 	}
@@ -438,16 +438,16 @@ static int fsl_of_msi_probe(struct platform_device *dev)
 	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) {
 		err = of_address_to_resource(dev->dev.of_node, 0, &res);
 		if (err) {
-			dev_err(&dev->dev, "invalid resource for node %s\n",
-				dev->dev.of_node->full_name);
+			dev_err(&dev->dev, "invalid resource for node %pOF\n",
+				dev->dev.of_node);
 			goto error_out;
 		}
 
 		msi->msi_regs = ioremap(res.start, resource_size(&res));
 		if (!msi->msi_regs) {
 			err = -ENOMEM;
-			dev_err(&dev->dev, "could not map node %s\n",
-				dev->dev.of_node->full_name);
+			dev_err(&dev->dev, "could not map node %pOF\n",
+				dev->dev.of_node);
 			goto error_out;
 		}
 		msi->msiir_offset =
@@ -522,8 +522,8 @@ static int fsl_of_msi_probe(struct platform_device *dev)
 		for (irq_index = 0, i = 0; i < len / (2 * sizeof(u32)); i++) {
 			if (p[i * 2] % IRQS_PER_MSI_REG ||
 			    p[i * 2 + 1] % IRQS_PER_MSI_REG) {
-				pr_warn("%s: %s: msi available range of %u at %u is not IRQ-aligned\n",
-				       __func__, dev->dev.of_node->full_name,
+				pr_warn("%s: %pOF: msi available range of %u at %u is not IRQ-aligned\n",
+				       __func__, dev->dev.of_node,
 				       p[i * 2 + 1], p[i * 2]);
 				err = -EINVAL;
 				goto error_out;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index d3a597456b6e..22d98057f773 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -202,7 +202,6 @@ static void setup_pci_atmu(struct pci_controller *hose)
 	u32 pcicsrbar = 0, pcicsrbar_sz;
 	u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
 			PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
-	const char *name = hose->dn->full_name;
 	const u64 *reg;
 	int len;
 	bool setup_inbound;
@@ -290,12 +289,12 @@ static void setup_pci_atmu(struct pci_controller *hose)
 	paddr_lo -= offset;
 
 	if (paddr_hi == paddr_lo) {
-		pr_err("%s: No outbound window space\n", name);
+		pr_err("%pOF: No outbound window space\n", hose->dn);
 		return;
 	}
 
 	if (paddr_lo == 0) {
-		pr_err("%s: No space for inbound window\n", name);
+		pr_err("%pOF: No space for inbound window\n", hose->dn);
 		return;
 	}
 
@@ -313,7 +312,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
 
 	paddr_lo = min(paddr_lo, (u64)pcicsrbar);
 
-	pr_info("%s: PCICSRBAR @ 0x%x\n", name, pcicsrbar);
+	pr_info("%pOF: PCICSRBAR @ 0x%x\n", hose->dn, pcicsrbar);
 
 	/* Setup inbound mem window */
 	mem = memblock_end_of_DRAM();
@@ -336,12 +335,12 @@ static void setup_pci_atmu(struct pci_controller *hose)
 		u64 address = be64_to_cpup(reg);
 
 		if ((address >= mem) && (address < (mem + PAGE_SIZE))) {
-			pr_info("%s: extending DDR ATMU to cover MSIIR", name);
+			pr_info("%pOF: extending DDR ATMU to cover MSIIR", hose->dn);
 			mem += PAGE_SIZE;
 		} else {
 			/* TODO: Create a new ATMU for MSIIR */
-			pr_warn("%s: msi-address-64 address of %llx is "
-				"unsupported\n", name, address);
+			pr_warn("%pOF: msi-address-64 address of %llx is "
+				"unsupported\n", hose->dn, address);
 		}
 	}
 
@@ -354,8 +353,8 @@ static void setup_pci_atmu(struct pci_controller *hose)
 		if ((1ull << mem_log) != mem) {
 			mem_log++;
 			if ((1ull << mem_log) > mem)
-				pr_info("%s: Setting PCI inbound window "
-					"greater than memory size\n", name);
+				pr_info("%pOF: Setting PCI inbound window "
+					"greater than memory size\n", hose->dn);
 		}
 
 		piwar |= ((mem_log - 1) & PIWAR_SZ_MASK);
@@ -402,7 +401,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
 			 */
 			ppc_md.dma_set_mask = fsl_pci_dma_set_mask;
 
-			pr_info("%s: Setup 64-bit PCI DMA window\n", name);
+			pr_info("%pOF: Setup 64-bit PCI DMA window\n", hose->dn);
 		}
 	} else {
 		u64 paddr = 0;
@@ -443,18 +442,18 @@ static void setup_pci_atmu(struct pci_controller *hose)
 #ifdef CONFIG_SWIOTLB
 		ppc_swiotlb_enable = 1;
 #else
-		pr_err("%s: ERROR: Memory size exceeds PCI ATMU ability to "
+		pr_err("%pOF: ERROR: Memory size exceeds PCI ATMU ability to "
 			"map - enable CONFIG_SWIOTLB to avoid dma errors.\n",
-			 name);
+			 hose->dn);
 #endif
 		/* adjusting outbound windows could reclaim space in mem map */
 		if (paddr_hi < 0xffffffffull)
-			pr_warning("%s: WARNING: Outbound window cfg leaves "
+			pr_warning("%pOF: WARNING: Outbound window cfg leaves "
 				"gaps in memory map. Adjusting the memory map "
 				"could reduce unnecessary bounce buffering.\n",
-				name);
+				hose->dn);
 
-		pr_info("%s: DMA window size is 0x%llx\n", name,
+		pr_info("%pOF: DMA window size is 0x%llx\n", hose->dn,
 			(u64)hose->dma_window_size);
 	}
 }
@@ -532,11 +531,11 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
 	dev = pdev->dev.of_node;
 
 	if (!of_device_is_available(dev)) {
-		pr_warning("%s: disabled\n", dev->full_name);
+		pr_warning("%pOF: disabled\n", dev);
 		return -ENODEV;
 	}
 
-	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
 
 	/* Fetch host bridge registers address */
 	if (of_address_to_resource(dev, 0, &rsrc)) {
@@ -547,8 +546,8 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
 	/* Get bus range if any */
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int))
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-			" bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+			" bus 0\n", dev);
 
 	pci_add_flags(PCI_REASSIGN_ALL_BUS);
 	hose = pcibios_alloc_controller(dev);
@@ -809,11 +808,11 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
 	is_mpc83xx_pci = 1;
 
 	if (!of_device_is_available(dev)) {
-		pr_warning("%s: disabled by the firmware.\n",
-			   dev->full_name);
+		pr_warning("%pOF: disabled by the firmware.\n",
+			   dev);
 		return -ENODEV;
 	}
-	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
 
 	/* Fetch host bridge registers address */
 	if (of_address_to_resource(dev, 0, &rsrc_reg)) {
@@ -848,8 +847,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
 	/* Get bus range if any */
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-		       " bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
 	}
 
 	pci_add_flags(PCI_REASSIGN_ALL_BUS);
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 1c41c51f22cb..9234be1e66f5 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -450,12 +450,12 @@ int fsl_rio_setup(struct platform_device *dev)
 
 	rc = of_address_to_resource(dev->dev.of_node, 0, &regs);
 	if (rc) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				dev->dev.of_node->full_name);
+		dev_err(&dev->dev, "Can't get %pOF property 'reg'\n",
+				dev->dev.of_node);
 		return -EFAULT;
 	}
-	dev_info(&dev->dev, "Of-device full name %s\n",
-			dev->dev.of_node->full_name);
+	dev_info(&dev->dev, "Of-device full name %pOF\n",
+			dev->dev.of_node);
 	dev_info(&dev->dev, "Regs: %pR\n", &regs);
 
 	rio_regs_win = ioremap(regs.start, resource_size(&regs));
@@ -494,8 +494,8 @@ int fsl_rio_setup(struct platform_device *dev)
 	}
 	rc = of_address_to_resource(rmu_node, 0, &rmu_regs);
 	if (rc) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				rmu_node->full_name);
+		dev_err(&dev->dev, "Can't get %pOF property 'reg'\n",
+				rmu_node);
 		goto err_rmu;
 	}
 	rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs));
@@ -529,8 +529,8 @@ int fsl_rio_setup(struct platform_device *dev)
 	aw = of_n_addr_cells(np);
 	dt_range = of_get_property(np, "reg", &rlen);
 	if (!dt_range) {
-		pr_err("%s: unable to find 'reg' property\n",
-			np->full_name);
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
 		rc = -ENOMEM;
 		goto err_pw;
 	}
@@ -557,8 +557,8 @@ int fsl_rio_setup(struct platform_device *dev)
 	aw = of_n_addr_cells(np);
 	dt_range = of_get_property(np, "reg", &rlen);
 	if (!dt_range) {
-		pr_err("%s: unable to find 'reg' property\n",
-			np->full_name);
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
 		rc = -ENOMEM;
 		goto err;
 	}
@@ -569,15 +569,15 @@ int fsl_rio_setup(struct platform_device *dev)
 	for_each_child_of_node(dev->dev.of_node, np) {
 		port_index = of_get_property(np, "cell-index", NULL);
 		if (!port_index) {
-			dev_err(&dev->dev, "Can't get %s property 'cell-index'\n",
-					np->full_name);
+			dev_err(&dev->dev, "Can't get %pOF property 'cell-index'\n",
+					np);
 			continue;
 		}
 
 		dt_range = of_get_property(np, "ranges", &rlen);
 		if (!dt_range) {
-			dev_err(&dev->dev, "Can't get %s property 'ranges'\n",
-					np->full_name);
+			dev_err(&dev->dev, "Can't get %pOF property 'ranges'\n",
+					np);
 			continue;
 		}
 
@@ -598,8 +598,8 @@ int fsl_rio_setup(struct platform_device *dev)
 		range_start = of_read_number(dt_range + aw, paw);
 		range_size = of_read_number(dt_range + aw + paw, sw);
 
-		dev_info(&dev->dev, "%s: LAW start 0x%016llx, size 0x%016llx.\n",
-				np->full_name, range_start, range_size);
+		dev_info(&dev->dev, "%pOF: LAW start 0x%016llx, size 0x%016llx.\n",
+				np, range_start, range_size);
 
 		port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
 		if (!port)
@@ -757,8 +757,8 @@ err_rio_regs:
  */
 static int fsl_of_rio_rpn_probe(struct platform_device *dev)
 {
-	printk(KERN_INFO "Setting up RapidIO peer-to-peer network %s\n",
-			dev->dev.of_node->full_name);
+	printk(KERN_INFO "Setting up RapidIO peer-to-peer network %pOF\n",
+			dev->dev.of_node);
 
 	return fsl_rio_setup(dev);
 };
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index c1826de4e749..ab7a74c75be8 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -1074,8 +1074,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
 	priv = mport->priv;
 
 	if (!node) {
-		dev_warn(priv->dev, "Can't get %s property 'fsl,rmu'\n",
-			priv->dev->of_node->full_name);
+		dev_warn(priv->dev, "Can't get %pOF property 'fsl,rmu'\n",
+			priv->dev->of_node);
 		return -EINVAL;
 	}
 
@@ -1086,8 +1086,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
 	aw = of_n_addr_cells(node);
 	msg_addr = of_get_property(node, "reg", &mlen);
 	if (!msg_addr) {
-		pr_err("%s: unable to find 'reg' property of message-unit\n",
-			node->full_name);
+		pr_err("%pOF: unable to find 'reg' property of message-unit\n",
+			node);
 		kfree(rmu);
 		return -ENOMEM;
 	}
@@ -1098,8 +1098,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
 
 	rmu->txirq = irq_of_parse_and_map(node, 0);
 	rmu->rxirq = irq_of_parse_and_map(node, 1);
-	printk(KERN_INFO "%s: txirq: %d, rxirq %d\n",
-		node->full_name, rmu->txirq, rmu->rxirq);
+	printk(KERN_INFO "%pOF: txirq: %d, rxirq %d\n",
+		node, rmu->txirq, rmu->rxirq);
 
 	priv->rmm_handle = rmu;
 
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 19101f9cfcfc..1f614fb2be56 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -98,7 +98,7 @@ u32 fsl_get_sys_freq(void)
 }
 EXPORT_SYMBOL(fsl_get_sys_freq);
 
-#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
 
 u32 get_brgfreq(void)
 {
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index d73daa4f0ccf..2640446f8bc4 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -7,7 +7,7 @@
 struct spi_device;
 
 extern phys_addr_t get_immrbase(void);
-#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
 extern u32 get_brgfreq(void);
 extern u32 get_baudrate(void);
 #else
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index f267ee0afc08..16f1edd78c40 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -315,6 +315,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 7,
 	},
 	[48] = {
+		.ack	= IPIC_SEPNR,
 		.mask	= IPIC_SEMSR,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SEFCR,
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b9aac951a90f..ead3e2549ebf 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1650,8 +1650,8 @@ void __init mpic_init(struct mpic *mpic)
 	if (mpic->flags & MPIC_SECONDARY) {
 		int virq = irq_of_parse_and_map(mpic->node, 0);
 		if (virq) {
-			printk(KERN_INFO "%s: hooking up to IRQ %d\n",
-					mpic->node->full_name, virq);
+			printk(KERN_INFO "%pOF: hooking up to IRQ %d\n",
+					mpic->node, virq);
 			irq_set_handler_data(virq, mpic);
 			irq_set_chained_handler(virq, &mpic_cascade);
 		}
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index db2286be5d9a..eb69a5186243 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -192,7 +192,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
 			return -ENOMEM;
 		}
 	}
-	dev_info(&dev->dev, "Of-device full name %s\n", np->full_name);
+	dev_info(&dev->dev, "Of-device full name %pOF\n", np);
 
 	/* IO map the message register block. */
 	of_address_to_resource(np, 0, &rsrc);
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index 1d48a5385905..9ed860aee9c3 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -60,7 +60,7 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 
 	np = NULL;
 	while ((np = of_find_all_nodes(np))) {
-		pr_debug("mpic: mapping hwirqs for %s\n", np->full_name);
+		pr_debug("mpic: mapping hwirqs for %pOF\n", np);
 
 		index = 0;
 		while (of_irq_parse_one(np, index++, &oirq) == 0) {
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index 9d9b06217f8b..a418579591be 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -466,8 +466,7 @@ static int timer_group_get_irq(struct device_node *np,
 
 	p = of_get_property(np, "fsl,available-ranges", &len);
 	if (p && len % (2 * sizeof(u32)) != 0) {
-		pr_err("%s: malformed available-ranges property.\n",
-				np->full_name);
+		pr_err("%pOF: malformed available-ranges property.\n", np);
 		return -EINVAL;
 	}
 
@@ -484,8 +483,7 @@ static int timer_group_get_irq(struct device_node *np,
 		for (j = 0; j < count; j++) {
 			irq = irq_of_parse_and_map(np, irq_index);
 			if (!irq) {
-				pr_err("%s: irq parse and map failed.\n",
-						np->full_name);
+				pr_err("%pOF: irq parse and map failed.\n", np);
 				return -EINVAL;
 			}
 
@@ -508,8 +506,7 @@ static void timer_group_init(struct device_node *np)
 
 	priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL);
 	if (!priv) {
-		pr_err("%s: cannot allocate memory for group.\n",
-				np->full_name);
+		pr_err("%pOF: cannot allocate memory for group.\n", np);
 		return;
 	}
 
@@ -518,29 +515,27 @@ static void timer_group_init(struct device_node *np)
 
 	priv->regs = of_iomap(np, i++);
 	if (!priv->regs) {
-		pr_err("%s: cannot ioremap timer register address.\n",
-				np->full_name);
+		pr_err("%pOF: cannot ioremap timer register address.\n", np);
 		goto out;
 	}
 
 	if (priv->flags & FSL_GLOBAL_TIMER) {
 		priv->group_tcr = of_iomap(np, i++);
 		if (!priv->group_tcr) {
-			pr_err("%s: cannot ioremap tcr address.\n",
-					np->full_name);
+			pr_err("%pOF: cannot ioremap tcr address.\n", np);
 			goto out;
 		}
 	}
 
 	ret = timer_group_get_freq(np, priv);
 	if (ret < 0) {
-		pr_err("%s: cannot get timer frequency.\n", np->full_name);
+		pr_err("%pOF: cannot get timer frequency.\n", np);
 		goto out;
 	}
 
 	ret = timer_group_get_irq(np, priv);
 	if (ret < 0) {
-		pr_err("%s: cannot get timer irqs.\n", np->full_name);
+		pr_err("%pOF: cannot get timer irqs.\n", np);
 		goto out;
 	}
 
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 5ebd3f018295..c4dae27172b3 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -86,13 +86,13 @@ int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp)
 	p = of_get_property(bmp->of_node, "msi-available-ranges", &len);
 	if (!p) {
 		pr_debug("msi_bitmap: no msi-available-ranges property " \
-			 "found on %s\n", bmp->of_node->full_name);
+			 "found on %pOF\n", bmp->of_node);
 		return 1;
 	}
 
 	if (len % (2 * sizeof(u32)) != 0) {
 		printk(KERN_WARNING "msi_bitmap: Malformed msi-available-ranges"
-		       " property on %s\n", bmp->of_node->full_name);
+		       " property on %pOF\n", bmp->of_node);
 		return -EINVAL;
 	}
 
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 026bbc3b2c47..185a67e742a6 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -452,8 +452,8 @@ static int __init mv64x60_device_setup(void)
 		err = mv64x60_mpsc_device_setup(np, id++);
 		if (err)
 			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"serial device %s: error %d.\n",
-					np->full_name, err);
+					"serial device %pOF: error %d.\n",
+					np, err);
 	}
 
 	id = 0;
@@ -463,8 +463,8 @@ static int __init mv64x60_device_setup(void)
 		if (IS_ERR(pdev)) {
 			err = PTR_ERR(pdev);
 			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"network block %s: error %d.\n",
-					np->full_name, err);
+					"network block %pOF: error %d.\n",
+					np, err);
 			continue;
 		}
 		for_each_child_of_node(np, np2) {
@@ -474,9 +474,9 @@ static int __init mv64x60_device_setup(void)
 			err = mv64x60_eth_device_setup(np2, id2++, pdev);
 			if (err)
 				printk(KERN_ERR "Failed to initialize "
-						"MV64x60 network device %s: "
+						"MV64x60 network device %pOF: "
 						"error %d.\n",
-						np2->full_name, err);
+						np2, err);
 		}
 	}
 
@@ -485,8 +485,8 @@ static int __init mv64x60_device_setup(void)
 		err = mv64x60_i2c_device_setup(np, id++);
 		if (err)
 			printk(KERN_ERR "Failed to initialize MV64x60 I2C "
-					"bus %s: error %d.\n",
-					np->full_name, err);
+					"bus %pOF: error %d.\n",
+					np, err);
 	}
 
 	/* support up to one watchdog timer */
@@ -494,8 +494,8 @@ static int __init mv64x60_device_setup(void)
 	if (np) {
 		if ((err = mv64x60_wdt_device_setup(np, id)))
 			printk(KERN_ERR "Failed to initialize MV64x60 "
-					"Watchdog %s: error %d.\n",
-					np->full_name, err);
+					"Watchdog %pOF: error %d.\n",
+					np, err);
 		of_node_put(np);
 	}
 
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index 330d56613c5a..d52b3b81e05f 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -70,7 +70,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
 	return count;
 }
 
-static struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
+static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
 	.attr = {
 		.name = "hs_reg",
 		.mode = S_IRUGO | S_IWUSR,
@@ -136,8 +136,8 @@ static int __init mv64x60_add_bridge(struct device_node *dev)
 	/* Get bus range if any */
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int))
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-		       " bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
 
 	hose = pcibios_alloc_controller(dev);
 	if (!hose)
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
index 6f54b54b1328..153fdac4720f 100644
--- a/arch/powerpc/sysdev/of_rtc.c
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -38,21 +38,21 @@ void __init of_instantiate_rtc(void)
 			res = kmalloc(sizeof(*res), GFP_KERNEL);
 			if (!res) {
 				printk(KERN_ERR "OF RTC: Out of memory "
-				       "allocating resource structure for %s\n",
-				       node->full_name);
+				       "allocating resource structure for %pOF\n",
+				       node);
 				continue;
 			}
 
 			err = of_address_to_resource(node, 0, res);
 			if (err) {
 				printk(KERN_ERR "OF RTC: Error "
-				       "translating resources for %s\n",
-				       node->full_name);
+				       "translating resources for %pOF\n",
+				       node);
 				continue;
 			}
 
-			printk(KERN_INFO "OF_RTC: %s is a %s @ 0x%llx-0x%llx\n",
-			       node->full_name, plat_name,
+			printk(KERN_INFO "OF_RTC: %pOF is a %s @ 0x%llx-0x%llx\n",
+			       node, plat_name,
 			       (unsigned long long)res->start,
 			       (unsigned long long)res->end);
 			platform_device_register_simple(plat_name, -1, res, 1);
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 76ea32c1b664..0f6fd5d04d33 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -194,12 +194,13 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
 
 	ent->dn = of_node_get(dn);
 	snprintf(ent->name, 16, "%08x", i);
-	ent->path.data = (void*) dn->full_name;
-	ent->path.size = strlen(dn->full_name);
+	ent->path.data = (void*)kasprintf(GFP_KERNEL, "%pOF", dn);
+	ent->path.size = strlen((char *)ent->path.data);
 
 	dir = debugfs_create_dir(ent->name, root);
 	if (!dir) {
 		of_node_put(dn);
+		kfree(ent->path.data);
 		kfree(ent);
 		return -1;
 	}
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index 6afddae2fb47..f02d4576138c 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -142,7 +142,6 @@ void __init simple_gpiochip_init(const char *compatible)
 		}
 		continue;
 err:
-		pr_err("%s: registration failed, status %d\n",
-		       np->full_name, ret);
+		pr_err("%pOF: registration failed, status %d\n", np, ret);
 	}
 }
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 5692dd569b9b..28ff1f53cefc 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -213,8 +213,8 @@ int __init tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary)
 	/* Get bus range if any */
 	bus_range = of_get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		printk(KERN_WARNING "Can't get bus-range for %s, assume"
-		       " bus 0\n", dev->full_name);
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
 	}
 
 	hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 12ccd7373d2f..3e3e25b5e30d 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -9,3 +9,8 @@ config PPC_XIVE_NATIVE
 	default n
 	select PPC_XIVE
 	depends on PPC_POWERNV
+
+config PPC_XIVE_SPAPR
+	bool
+	default n
+	select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
index 3fab303fc169..536d6e5706e3 100644
--- a/arch/powerpc/sysdev/xive/Makefile
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -2,3 +2,4 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 obj-y				+= common.o
 obj-$(CONFIG_PPC_XIVE_NATIVE)	+= native.o
+obj-$(CONFIG_PPC_XIVE_SPAPR)	+= spapr.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 6595462b1fc8..f387318678b9 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -40,7 +40,8 @@
 #undef DEBUG_ALL
 
 #ifdef DEBUG_ALL
-#define DBG_VERBOSE(fmt...)	pr_devel(fmt)
+#define DBG_VERBOSE(fmt, ...)	pr_devel("cpu %d - " fmt, \
+					 smp_processor_id(), ## __VA_ARGS__)
 #else
 #define DBG_VERBOSE(fmt...)	do { } while(0)
 #endif
@@ -190,7 +191,7 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
  * This is used to perform the magic loads from an ESB
  * described in xive.h
  */
-static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
+static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
 {
 	u64 val;
 
@@ -198,13 +199,28 @@ static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
 	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
 		offset |= offset << 4;
 
-	val = in_be64(xd->eoi_mmio + offset);
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
+	else
+		val = in_be64(xd->eoi_mmio + offset);
 
 	return (u8)val;
 }
 
+static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
+{
+	/* Handle HW errata */
+	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+		offset |= offset << 4;
+
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
+	else
+		out_be64(xd->eoi_mmio + offset, data);
+}
+
 #ifdef CONFIG_XMON
-static void xive_dump_eq(const char *name, struct xive_q *q)
+static notrace void xive_dump_eq(const char *name, struct xive_q *q)
 {
 	u32 i0, i1, idx;
 
@@ -218,7 +234,7 @@ static void xive_dump_eq(const char *name, struct xive_q *q)
 		    q->toggle, i0, i1);
 }
 
-void xmon_xive_do_dump(int cpu)
+notrace void xmon_xive_do_dump(int cpu)
 {
 	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 
@@ -227,7 +243,7 @@ void xmon_xive_do_dump(int cpu)
 	xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
 #ifdef CONFIG_SMP
 	{
-		u64 val = xive_poke_esb(&xc->ipi_data, XIVE_ESB_GET);
+		u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
 		xmon_printf("  IPI state: %x:%c%c\n", xc->hw_ipi,
 			val & XIVE_ESB_VAL_P ? 'P' : 'p',
 			val & XIVE_ESB_VAL_P ? 'Q' : 'q');
@@ -297,7 +313,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
+		xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		/*
 		 * The FW told us to call it. This happens for some
@@ -326,10 +342,10 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 		 * properly.
 		 */
 		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			in_be64(xd->eoi_mmio);
+			xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
 		else {
-			eoi_val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
-			DBG_VERBOSE("eoi_val=%x\n", offset, eoi_val);
+			eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+			DBG_VERBOSE("eoi_val=%x\n", eoi_val);
 
 			/* Re-trigger if needed */
 			if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
@@ -383,12 +399,12 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
 	 * ESB accordingly on unmask.
 	 */
 	if (mask) {
-		val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_01);
+		val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
 		xd->saved_p = !!(val & XIVE_ESB_VAL_P);
 	} else if (xd->saved_p)
-		xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
 	else
-		xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
 }
 
 /*
@@ -447,7 +463,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
 	int cpu, first, num, i;
 
 	/* Pick up a starting point CPU in the mask based on  fuzz */
-	num = cpumask_weight(mask);
+	num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
 	first = fuzz % num;
 
 	/* Locate it */
@@ -672,6 +688,10 @@ static int xive_irq_set_affinity(struct irq_data *d,
 	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
 		return -EINVAL;
 
+	/* Don't do anything if the interrupt isn't started */
+	if (!irqd_is_started(d))
+		return IRQ_SET_MASK_OK;
+
 	/*
 	 * If existing target is already in the new mask, and is
 	 * online then do nothing.
@@ -768,7 +788,7 @@ static int xive_irq_retrigger(struct irq_data *d)
 	 * To perform a retrigger, we first set the PQ bits to
 	 * 11, then perform an EOI.
 	 */
-	xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
 
 	/*
 	 * Note: We pass "0" to the hw_irq argument in order to
@@ -803,7 +823,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
 		irqd_set_forwarded_to_vcpu(d);
 
 		/* Set it to PQ=10 state to prevent further sends */
-		pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+		pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
 
 		/* No target ? nothing to do */
 		if (xd->target == XIVE_INVALID_TARGET) {
@@ -832,7 +852,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
 		 * for sure the queue slot is no longer in use.
 		 */
 		if (pq & 2) {
-			pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+			pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
 			xd->saved_p = true;
 
 			/*
@@ -989,6 +1009,9 @@ static void xive_ipi_eoi(struct irq_data *d)
 {
 	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
 
+	DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+		    d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
 	/* Handle possible race with unplug and drop stale IPIs */
 	if (!xc)
 		return;
@@ -1368,6 +1391,19 @@ void xive_flush_interrupt(void)
 
 #endif /* CONFIG_SMP */
 
+void xive_teardown_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Set CPPR to 0 to disable flow of interrupts */
+	xc->cppr = 0;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+	if (xive_ops->teardown_cpu)
+		xive_ops->teardown_cpu(cpu, xc);
+}
+
 void xive_kexec_teardown_cpu(int secondary)
 {
 	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
@@ -1395,8 +1431,8 @@ void xive_shutdown(void)
 	xive_ops->shutdown();
 }
 
-bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
-		    u8 max_prio)
+bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
+			   u8 max_prio)
 {
 	xive_tima = area;
 	xive_tima_offset = offset;
@@ -1424,6 +1460,22 @@ bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
 	return true;
 }
 
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
+{
+	unsigned int alloc_order;
+	struct page *pages;
+	__be32 *qpage;
+
+	alloc_order = xive_alloc_order(queue_shift);
+	pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	qpage = (__be32 *)page_address(pages);
+	memset(qpage, 0, 1 << queue_shift);
+
+	return qpage;
+}
+
 static int __init xive_off(char *arg)
 {
 	xive_cmdline_disabled = true;
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 0f95476b01f6..44f3a25ca630 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -82,6 +82,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 		return -ENOMEM;
 	}
 
+	data->hw_irq = hw_irq;
+
 	if (!data->trig_page)
 		return 0;
 	if (data->trig_page == data->eoi_page) {
@@ -202,17 +204,12 @@ EXPORT_SYMBOL_GPL(xive_native_disable_queue);
 static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
 {
 	struct xive_q *q = &xc->queue[prio];
-	unsigned int alloc_order;
-	struct page *pages;
 	__be32 *qpage;
 
-	alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
-		(xive_queue_shift - PAGE_SHIFT) : 0;
-	pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
-	if (!pages)
-		return -ENOMEM;
-	qpage = (__be32 *)page_address(pages);
-	memset(qpage, 0, 1 << xive_queue_shift);
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
 	return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
 					   q, prio, qpage, xive_queue_shift, false);
 }
@@ -227,8 +224,7 @@ static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8
 	 * from an IPI and iounmap isn't safe
 	 */
 	__xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
-	alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
-		(xive_queue_shift - PAGE_SHIFT) : 0;
+	alloc_order = xive_alloc_order(xive_queue_shift);
 	free_pages((unsigned long)q->qpage, alloc_order);
 	q->qpage = NULL;
 }
@@ -531,7 +527,7 @@ u32 xive_native_default_eq_shift(void)
 }
 EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
 
-bool xive_native_init(void)
+bool __init xive_native_init(void)
 {
 	struct device_node *np;
 	struct resource r;
@@ -551,7 +547,7 @@ bool xive_native_init(void)
 		pr_devel("not found !\n");
 		return false;
 	}
-	pr_devel("Found %s\n", np->full_name);
+	pr_devel("Found %pOF\n", np);
 
 	/* Resource 1 is HV window */
 	if (of_address_to_resource(np, 1, &r)) {
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
new file mode 100644
index 000000000000..f24a70bc6855
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/hvcall.h>
+
+#include "xive-internal.h"
+
+static u32 xive_queue_shift;
+
+struct xive_irq_bitmap {
+	unsigned long		*bitmap;
+	unsigned int		base;
+	unsigned int		count;
+	spinlock_t		lock;
+	struct list_head	list;
+};
+
+static LIST_HEAD(xive_irq_bitmaps);
+
+static int xive_irq_bitmap_add(int base, int count)
+{
+	struct xive_irq_bitmap *xibm;
+
+	xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
+	if (!xibm)
+		return -ENOMEM;
+
+	spin_lock_init(&xibm->lock);
+	xibm->base = base;
+	xibm->count = count;
+	xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+	list_add(&xibm->list, &xive_irq_bitmaps);
+
+	pr_info("Using IRQ range [%x-%x]", xibm->base,
+		xibm->base + xibm->count - 1);
+	return 0;
+}
+
+static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+{
+	int irq;
+
+	irq = find_first_zero_bit(xibm->bitmap, xibm->count);
+	if (irq != xibm->count) {
+		set_bit(irq, xibm->bitmap);
+		irq += xibm->base;
+	} else {
+		irq = -ENOMEM;
+	}
+
+	return irq;
+}
+
+static int xive_irq_bitmap_alloc(void)
+{
+	struct xive_irq_bitmap *xibm;
+	unsigned long flags;
+	int irq = -ENOENT;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		spin_lock_irqsave(&xibm->lock, flags);
+		irq = __xive_irq_bitmap_alloc(xibm);
+		spin_unlock_irqrestore(&xibm->lock, flags);
+		if (irq >= 0)
+			break;
+	}
+	return irq;
+}
+
+static void xive_irq_bitmap_free(int irq)
+{
+	unsigned long flags;
+	struct xive_irq_bitmap *xibm;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
+			spin_lock_irqsave(&xibm->lock, flags);
+			clear_bit(irq - xibm->base, xibm->bitmap);
+			spin_unlock_irqrestore(&xibm->lock, flags);
+			break;
+		}
+	}
+}
+
+static long plpar_int_get_source_info(unsigned long flags,
+				      unsigned long lisn,
+				      unsigned long *src_flags,
+				      unsigned long *eoi_page,
+				      unsigned long *trig_page,
+				      unsigned long *esb_shift)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+	if (rc) {
+		pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
+		return rc;
+	}
+
+	*src_flags = retbuf[0];
+	*eoi_page  = retbuf[1];
+	*trig_page = retbuf[2];
+	*esb_shift = retbuf[3];
+
+	pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
+		retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+
+	return 0;
+}
+
+#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
+#define XIVE_SRC_MASK     (1ull << (63 - 63)) /* unused */
+
+static long plpar_int_set_source_config(unsigned long flags,
+					unsigned long lisn,
+					unsigned long target,
+					unsigned long prio,
+					unsigned long sw_irq)
+{
+	long rc;
+
+
+	pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n",
+		flags, lisn, target, prio, sw_irq);
+
+
+	rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+				target, prio, sw_irq);
+	if (rc) {
+		pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
+		       lisn, target, prio, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_get_queue_info(unsigned long flags,
+				     unsigned long target,
+				     unsigned long priority,
+				     unsigned long *esn_page,
+				     unsigned long *esn_size)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
+	if (rc) {
+		pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
+		       target, priority, rc);
+		return rc;
+	}
+
+	*esn_page = retbuf[0];
+	*esn_size = retbuf[1];
+
+	pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
+		retbuf[0], retbuf[1]);
+
+	return 0;
+}
+
+#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
+
+static long plpar_int_set_queue_config(unsigned long flags,
+				       unsigned long target,
+				       unsigned long priority,
+				       unsigned long qpage,
+				       unsigned long qsize)
+{
+	long rc;
+
+	pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
+		flags,  target, priority, qpage, qsize);
+
+	rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+				priority, qpage, qsize);
+	if (rc) {
+		pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
+		       target, priority, qpage, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_sync(unsigned long flags, unsigned long lisn)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+	if (rc) {
+		pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+#define XIVE_ESB_FLAG_STORE (1ull << (63 - 63))
+
+static long plpar_int_esb(unsigned long flags,
+			  unsigned long lisn,
+			  unsigned long offset,
+			  unsigned long in_data,
+			  unsigned long *out_data)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
+		flags,  lisn, offset, in_data);
+
+	rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, in_data);
+	if (rc) {
+		pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
+		       lisn, offset, rc);
+		return  rc;
+	}
+
+	*out_data = retbuf[0];
+
+	return 0;
+}
+
+static u64 xive_spapr_esb_rw(u32 lisn, u32 offset, u64 data, bool write)
+{
+	unsigned long read_data;
+	long rc;
+
+	rc = plpar_int_esb(write ? XIVE_ESB_FLAG_STORE : 0,
+			   lisn, offset, data, &read_data);
+	if (rc)
+		return -1;
+
+	return write ? 0 : read_data;
+}
+
+#define XIVE_SRC_H_INT_ESB     (1ull << (63 - 60))
+#define XIVE_SRC_LSI           (1ull << (63 - 61))
+#define XIVE_SRC_TRIGGER       (1ull << (63 - 62))
+#define XIVE_SRC_STORE_EOI     (1ull << (63 - 63))
+
+static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+	long rc;
+	unsigned long flags;
+	unsigned long eoi_page;
+	unsigned long trig_page;
+	unsigned long esb_shift;
+
+	memset(data, 0, sizeof(*data));
+
+	rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
+				       &esb_shift);
+	if (rc)
+		return  -EINVAL;
+
+	if (flags & XIVE_SRC_H_INT_ESB)
+		data->flags  |= XIVE_IRQ_FLAG_H_INT_ESB;
+	if (flags & XIVE_SRC_STORE_EOI)
+		data->flags  |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (flags & XIVE_SRC_LSI)
+		data->flags  |= XIVE_IRQ_FLAG_LSI;
+	data->eoi_page  = eoi_page;
+	data->esb_shift = esb_shift;
+	data->trig_page = trig_page;
+
+	/*
+	 * No chip-id for the sPAPR backend. This has an impact how we
+	 * pick a target. See xive_pick_irq_target().
+	 */
+	data->src_chip = XIVE_INVALID_CHIP_ID;
+
+	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+	if (!data->eoi_mmio) {
+		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+
+	data->hw_irq = hw_irq;
+
+	/* Full function page supports trigger */
+	if (flags & XIVE_SRC_TRIGGER) {
+		data->trig_mmio = data->eoi_mmio;
+		return 0;
+	}
+
+	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+	if (!data->trig_mmio) {
+		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+	long rc;
+
+	rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
+					 prio, sw_irq);
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
+				   __be32 *qpage, u32 order)
+{
+	s64 rc = 0;
+	unsigned long esn_page;
+	unsigned long esn_size;
+	u64 flags, qpage_phys;
+
+	/* If there's an actual queue page, clean it */
+	if (order) {
+		if (WARN_ON(!qpage))
+			return -EINVAL;
+		qpage_phys = __pa(qpage);
+	} else {
+		qpage_phys = 0;
+	}
+
+	/* Initialize the rest of the fields */
+	q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+	q->idx = 0;
+	q->toggle = 0;
+
+	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
+	if (rc) {
+		pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+		rc = -EIO;
+		goto fail;
+	}
+
+	/* TODO: add support for the notification page */
+	q->eoi_phys = esn_page;
+
+	/* Default is to always notify */
+	flags = XIVE_EQ_ALWAYS_NOTIFY;
+
+	/* Configure and enable the queue in HW */
+	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
+	if (rc) {
+		pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+		rc = -EIO;
+	} else {
+		q->qpage = qpage;
+	}
+fail:
+	return rc;
+}
+
+static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	__be32 *qpage;
+
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
+	return xive_spapr_configure_queue(cpu, q, prio, qpage,
+					  xive_queue_shift);
+}
+
+static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	unsigned int alloc_order;
+	long rc;
+
+	rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+	if (rc)
+		pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+
+	alloc_order = xive_alloc_order(xive_queue_shift);
+	free_pages((unsigned long)q->qpage, alloc_order);
+	q->qpage = NULL;
+}
+
+static bool xive_spapr_match(struct device_node *node)
+{
+	/* Ignore cascaded controllers for the moment */
+	return 1;
+}
+
+#ifdef CONFIG_SMP
+static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	int irq = xive_irq_bitmap_alloc();
+
+	if (irq < 0) {
+		pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+		return -ENXIO;
+	}
+
+	xc->hw_ipi = irq;
+	return 0;
+}
+
+static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	xive_irq_bitmap_free(xc->hw_ipi);
+}
+#endif /* CONFIG_SMP */
+
+static void xive_spapr_shutdown(void)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(H_INT_RESET, 0);
+	if (rc)
+		pr_err("H_INT_RESET failed %ld\n", rc);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread. Grab the pending
+ * active priorities and update the CPPR to the most favored one.
+ */
+static void xive_spapr_update_pending(struct xive_cpu *xc)
+{
+	u8 nsr, cppr;
+	u16 ack;
+
+	/*
+	 * Perform the "Acknowledge O/S to Register" cycle.
+	 *
+	 * Let's speedup the access to the TIMA using the raw I/O
+	 * accessor as we don't need the synchronisation routine of
+	 * the higher level ones
+	 */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/*
+	 * Grab the CPPR and the "NSR" field which indicates the source
+	 * of the interrupt (if any)
+	 */
+	cppr = ack & 0xff;
+	nsr = ack >> 8;
+
+	if (nsr & TM_QW1_NSR_EO) {
+		if (cppr == 0xff)
+			return;
+		/* Mark the priority pending */
+		xc->pending_prio |= 1 << cppr;
+
+		/*
+		 * A new interrupt should never have a CPPR less favored
+		 * than our current one.
+		 */
+		if (cppr >= xc->cppr)
+			pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+			       smp_processor_id(), cppr, xc->cppr);
+
+		/* Update our idea of what the CPPR is */
+		xc->cppr = cppr;
+	}
+}
+
+static void xive_spapr_eoi(u32 hw_irq)
+{
+	/* Not used */;
+}
+
+static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Only some debug on the TIMA settings */
+	pr_debug("(HW value: %08x %08x %08x)\n",
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
+}
+
+static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Nothing to do */;
+}
+
+static void xive_spapr_sync_source(u32 hw_irq)
+{
+	/* Specs are unclear on what this is doing */
+	plpar_int_sync(0, hw_irq);
+}
+
+static const struct xive_ops xive_spapr_ops = {
+	.populate_irq_data	= xive_spapr_populate_irq_data,
+	.configure_irq		= xive_spapr_configure_irq,
+	.setup_queue		= xive_spapr_setup_queue,
+	.cleanup_queue		= xive_spapr_cleanup_queue,
+	.match			= xive_spapr_match,
+	.shutdown		= xive_spapr_shutdown,
+	.update_pending		= xive_spapr_update_pending,
+	.eoi			= xive_spapr_eoi,
+	.setup_cpu		= xive_spapr_setup_cpu,
+	.teardown_cpu		= xive_spapr_teardown_cpu,
+	.sync_source		= xive_spapr_sync_source,
+	.esb_rw			= xive_spapr_esb_rw,
+#ifdef CONFIG_SMP
+	.get_ipi		= xive_spapr_get_ipi,
+	.put_ipi		= xive_spapr_put_ipi,
+#endif /* CONFIG_SMP */
+	.name			= "spapr",
+};
+
+/*
+ * get max priority from "/ibm,plat-res-int-priorities"
+ */
+static bool xive_get_max_prio(u8 *max_prio)
+{
+	struct device_node *rootdn;
+	const __be32 *reg;
+	u32 len;
+	int prio, found;
+
+	rootdn = of_find_node_by_path("/");
+	if (!rootdn) {
+		pr_err("not root node found !\n");
+		return false;
+	}
+
+	reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	/* HW supports priorities in the range [0-7] and 0xFF is a
+	 * wildcard priority used to mask. We scan the ranges reserved
+	 * by the hypervisor to find the lowest priority we can use.
+	 */
+	found = 0xFF;
+	for (prio = 0; prio < 8; prio++) {
+		int reserved = 0;
+		int i;
+
+		for (i = 0; i < len / (2 * sizeof(u32)); i++) {
+			int base  = be32_to_cpu(reg[2 * i]);
+			int range = be32_to_cpu(reg[2 * i + 1]);
+
+			if (prio >= base && prio < base + range)
+				reserved++;
+		}
+
+		if (!reserved)
+			found = prio;
+	}
+
+	if (found == 0xFF) {
+		pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n");
+		return false;
+	}
+
+	*max_prio = found;
+	return true;
+}
+
+bool __init xive_spapr_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	void __iomem *tima;
+	struct property *prop;
+	u8 max_prio;
+	u32 val;
+	u32 len;
+	const __be32 *reg;
+	int i;
+
+	if (xive_cmdline_disabled)
+		return false;
+
+	pr_devel("%s()\n", __func__);
+	np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
+	if (!np) {
+		pr_devel("not found !\n");
+		return false;
+	}
+	pr_devel("Found %s\n", np->full_name);
+
+	/* Resource 1 is the OS ring TIMA */
+	if (of_address_to_resource(np, 1, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		return false;
+	}
+	tima = ioremap(r.start, resource_size(&r));
+	if (!tima) {
+		pr_err("Failed to map thread mgmnt area\n");
+		return false;
+	}
+
+	if (!xive_get_max_prio(&max_prio))
+		return false;
+
+	/* Feed the IRQ number allocator with the ranges given in the DT */
+	reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+		return false;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+		return false;
+	}
+
+	for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2)
+		xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+				    be32_to_cpu(reg[1]));
+
+	/* Iterate the EQ sizes and pick one */
+	of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+		xive_queue_shift = val;
+		if (val == PAGE_SHIFT)
+			break;
+	}
+
+	/* Initialize XIVE core with our backend */
+	if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+		return false;
+
+	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+	return true;
+}
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index d07ef2d29caf..f34abed0c05f 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -47,6 +47,7 @@ struct xive_ops {
 	void	(*update_pending)(struct xive_cpu *xc);
 	void	(*eoi)(u32 hw_irq);
 	void	(*sync_source)(u32 hw_irq);
+	u64	(*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write);
 #ifdef CONFIG_SMP
 	int	(*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
 	void	(*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
@@ -56,6 +57,12 @@ struct xive_ops {
 
 bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
 		    u8 max_prio);
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift);
+
+static inline u32 xive_alloc_order(u32 queue_shift)
+{
+	return (queue_shift > PAGE_SHIFT) ? (queue_shift - PAGE_SHIFT) : 0;
+}
 
 extern bool xive_cmdline_disabled;
 
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 0b2f771593eb..1dd88315cff4 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
 
+# Disable ftrace for the entire directory
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)))
+
 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
 obj-y			+= xmon.o nonstdio.o spr_access.o
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 08e367e3e8c3..33351c6704b1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -89,6 +89,7 @@ static unsigned long nidump = 16;
 static unsigned long ncsum = 4096;
 static int termch;
 static char tmpstr[128];
+static int tracing_enabled;
 
 static long bus_error_jmp[JMP_BUF_LEN];
 static int catch_memory_errors;
@@ -234,6 +235,7 @@ Commands:\n\
   "\
   dr	dump stream of raw bytes\n\
   dt	dump the tracing buffers (uses printk)\n\
+  dtc	dump the tracing buffers for current CPU (uses printk)\n\
 "
 #ifdef CONFIG_PPC_POWERNV
 "  dx#   dump xive on CPU #\n\
@@ -461,6 +463,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	local_irq_save(flags);
 	hard_irq_disable();
 
+	tracing_enabled = tracing_is_on();
+	tracing_off();
+
 	bp = in_breakpoint_table(regs->nip, &offset);
 	if (bp != NULL) {
 		regs->nip = bp->address + offset;
@@ -981,6 +986,8 @@ cmds(struct pt_regs *excp)
 			break;
 		case 'x':
 		case 'X':
+			if (tracing_enabled)
+				tracing_on();
 			return cmd;
 		case EOF:
 			printf(" <no input ...>\n");
@@ -1732,23 +1739,25 @@ static void dump_206_sprs(void)
 
 	/* Actually some of these pre-date 2.06, but whatevs */
 
-	printf("srr0   = %.16x  srr1  = %.16x dsisr  = %.8x\n",
+	printf("srr0   = %.16lx  srr1  = %.16lx dsisr  = %.8x\n",
 		mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR));
-	printf("dscr   = %.16x  ppr   = %.16x pir    = %.8x\n",
+	printf("dscr   = %.16lx  ppr   = %.16lx pir    = %.8x\n",
 		mfspr(SPRN_DSCR), mfspr(SPRN_PPR), mfspr(SPRN_PIR));
+	printf("amr    = %.16lx  uamor = %.16lx\n",
+		mfspr(SPRN_AMR), mfspr(SPRN_UAMOR));
 
 	if (!(mfmsr() & MSR_HV))
 		return;
 
-	printf("sdr1   = %.16x  hdar  = %.16x hdsisr = %.8x\n",
+	printf("sdr1   = %.16lx  hdar  = %.16lx hdsisr = %.8x\n",
 		mfspr(SPRN_SDR1), mfspr(SPRN_HDAR), mfspr(SPRN_HDSISR));
-	printf("hsrr0  = %.16x hsrr1  = %.16x hdec = %.8x\n",
+	printf("hsrr0  = %.16lx hsrr1  = %.16lx hdec   = %.16lx\n",
 		mfspr(SPRN_HSRR0), mfspr(SPRN_HSRR1), mfspr(SPRN_HDEC));
-	printf("lpcr   = %.16x  pcr   = %.16x lpidr = %.8x\n",
+	printf("lpcr   = %.16lx  pcr   = %.16lx lpidr  = %.8x\n",
 		mfspr(SPRN_LPCR), mfspr(SPRN_PCR), mfspr(SPRN_LPID));
-	printf("hsprg0 = %.16x hsprg1 = %.16x\n",
-		mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1));
-	printf("dabr   = %.16x dabrx  = %.16x\n",
+	printf("hsprg0 = %.16lx hsprg1 = %.16lx amor   = %.16lx\n",
+		mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1), mfspr(SPRN_AMOR));
+	printf("dabr   = %.16lx dabrx  = %.16lx\n",
 		mfspr(SPRN_DABR), mfspr(SPRN_DABRX));
 #endif
 }
@@ -1761,42 +1770,65 @@ static void dump_207_sprs(void)
 	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
 		return;
 
-	printf("dpdes  = %.16x  tir   = %.16x cir    = %.8x\n",
+	printf("dpdes  = %.16lx  tir   = %.16lx cir    = %.8x\n",
 		mfspr(SPRN_DPDES), mfspr(SPRN_TIR), mfspr(SPRN_CIR));
 
-	printf("fscr   = %.16x  tar   = %.16x pspb   = %.8x\n",
+	printf("fscr   = %.16lx  tar   = %.16lx pspb   = %.8x\n",
 		mfspr(SPRN_FSCR), mfspr(SPRN_TAR), mfspr(SPRN_PSPB));
 
 	msr = mfmsr();
 	if (msr & MSR_TM) {
 		/* Only if TM has been enabled in the kernel */
-		printf("tfhar  = %.16x  tfiar = %.16x texasr = %.16x\n",
+		printf("tfhar  = %.16lx  tfiar = %.16lx texasr = %.16lx\n",
 			mfspr(SPRN_TFHAR), mfspr(SPRN_TFIAR),
 			mfspr(SPRN_TEXASR));
 	}
 
-	printf("mmcr0  = %.16x  mmcr1 = %.16x mmcr2  = %.16x\n",
+	printf("mmcr0  = %.16lx  mmcr1 = %.16lx mmcr2  = %.16lx\n",
 		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCR2));
 	printf("pmc1   = %.8x pmc2 = %.8x  pmc3 = %.8x  pmc4   = %.8x\n",
 		mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
 		mfspr(SPRN_PMC3), mfspr(SPRN_PMC4));
-	printf("mmcra  = %.16x   siar = %.16x pmc5   = %.8x\n",
+	printf("mmcra  = %.16lx   siar = %.16lx pmc5   = %.8x\n",
 		mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR), mfspr(SPRN_PMC5));
-	printf("sdar   = %.16x   sier = %.16x pmc6   = %.8x\n",
+	printf("sdar   = %.16lx   sier = %.16lx pmc6   = %.8x\n",
 		mfspr(SPRN_SDAR), mfspr(SPRN_SIER), mfspr(SPRN_PMC6));
-	printf("ebbhr  = %.16x  ebbrr = %.16x bescr  = %.16x\n",
+	printf("ebbhr  = %.16lx  ebbrr = %.16lx bescr  = %.16lx\n",
 		mfspr(SPRN_EBBHR), mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+	printf("iamr   = %.16lx\n", mfspr(SPRN_IAMR));
 
 	if (!(msr & MSR_HV))
 		return;
 
-	printf("hfscr  = %.16x  dhdes = %.16x rpr    = %.16x\n",
+	printf("hfscr  = %.16lx  dhdes = %.16lx rpr    = %.16lx\n",
 		mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR));
-	printf("dawr   = %.16x  dawrx = %.16x ciabr  = %.16x\n",
+	printf("dawr   = %.16lx  dawrx = %.16lx ciabr  = %.16lx\n",
 		mfspr(SPRN_DAWR), mfspr(SPRN_DAWRX), mfspr(SPRN_CIABR));
 #endif
 }
 
+static void dump_300_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	bool hv = mfmsr() & MSR_HV;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	printf("pidr   = %.16lx  tidr  = %.16lx\n",
+		mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+	printf("asdr   = %.16lx  psscr = %.16lx\n",
+		mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
+					: mfspr(SPRN_PSSCR_PR));
+
+	if (!hv)
+		return;
+
+	printf("ptcr   = %.16lx\n",
+		mfspr(SPRN_PTCR));
+#endif
+}
+
 static void dump_one_spr(int spr, bool show_unimplemented)
 {
 	unsigned long val;
@@ -1850,6 +1882,7 @@ static void super_regs(void)
 
 		dump_206_sprs();
 		dump_207_sprs();
+		dump_300_sprs();
 
 		return;
 	}
@@ -2231,6 +2264,17 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
 	printf("\n");
 }
 
+static void dump_tracing(void)
+{
+	int c;
+
+	c = inchar();
+	if (c == 'c')
+		ftrace_dump(DUMP_ORIG);
+	else
+		ftrace_dump(DUMP_ALL);
+}
+
 #ifdef CONFIG_PPC64
 static void dump_one_paca(int cpu)
 {
@@ -2507,6 +2551,11 @@ dump(void)
 	}
 #endif
 
+	if (c == 't') {
+		dump_tracing();
+		return;
+	}
+
 	if (c == '\n')
 		termch = c;
 
@@ -2525,9 +2574,6 @@ dump(void)
 		dump_log_buf();
 	} else if (c == 'o') {
 		dump_opal_msglog();
-	} else if (c == 't') {
-		ftrace_dump(DUMP_ALL);
-		tracing_on();
 	} else if (c == 'r') {
 		scanhex(&ndump);
 		if (ndump == 0)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7eeb75d758c1..48af970320cb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -222,6 +222,10 @@ config HAVE_MARCH_Z13_FEATURES
 	def_bool n
 	select HAVE_MARCH_ZEC12_FEATURES
 
+config HAVE_MARCH_Z14_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z13_FEATURES
+
 choice
 	prompt "Processor type"
 	default MARCH_Z196
@@ -282,6 +286,14 @@ config MARCH_Z13
 	  2964 series). The kernel will be slightly faster but will not work on
 	  older machines.
 
+config MARCH_Z14
+	bool "IBM z14"
+	select HAVE_MARCH_Z14_FEATURES
+	help
+	  Select this to enable optimizations for IBM z14 (3906 series).
+	  The kernel will be slightly faster but will not work on older
+	  machines.
+
 endchoice
 
 config MARCH_Z900_TUNE
@@ -305,6 +317,9 @@ config MARCH_ZEC12_TUNE
 config MARCH_Z13_TUNE
 	def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
 
+config MARCH_Z14_TUNE
+	def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
+
 choice
 	prompt "Tune code generation"
 	default TUNE_DEFAULT
@@ -343,6 +358,9 @@ config TUNE_ZEC12
 config TUNE_Z13
 	bool "IBM z13"
 
+config TUNE_Z14
+	bool "IBM z14"
+
 endchoice
 
 config 64BIT
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 54e00526b8df..dac821cfcd43 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -31,7 +31,8 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
 mflags-$(CONFIG_MARCH_Z10)    := -march=z10
 mflags-$(CONFIG_MARCH_Z196)   := -march=z196
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
-mflags-$(CONFIG_MARCH_Z13)   := -march=z13
+mflags-$(CONFIG_MARCH_Z13)    := -march=z13
+mflags-$(CONFIG_MARCH_Z14)    := -march=z14
 
 export CC_FLAGS_MARCH := $(mflags-y)
 
@@ -44,7 +45,8 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE)	+= -mtune=z9-109
 cflags-$(CONFIG_MARCH_Z10_TUNE)		+= -mtune=z10
 cflags-$(CONFIG_MARCH_Z196_TUNE)	+= -mtune=z196
 cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
-cflags-$(CONFIG_MARCH_Z13_TUNE)	+= -mtune=z13
+cflags-$(CONFIG_MARCH_Z13_TUNE)		+= -mtune=z13
+cflags-$(CONFIG_MARCH_Z14_TUNE)		+= -mtune=z14
 
 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3c88479feba..6e2c9f7e47fa 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -16,4 +16,5 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
+generic-y += unaligned.h
 generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index b9300f8aee10..07a82bc933a7 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -8,11 +8,12 @@
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
-#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
+				typeof(0?(__force t)0:0ULL), u64))
 
 #define __SC_DELOUSE(t,v) ({ \
 	BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
-	(t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+	(__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
 })
 
 #define PSW32_MASK_PER		0x40000000UL
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
index 3dfadb5d648f..ca2b0624ad46 100644
--- a/arch/s390/include/asm/cpcmd.h
+++ b/arch/s390/include/asm/cpcmd.h
@@ -10,9 +10,8 @@
 
 /*
  * the lowlevel function for cpcmd
- * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
  */
-extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
 
 /*
  * cpcmd is the in-kernel interface for issuing CP commands
@@ -25,8 +24,8 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code
  * response_code: return pointer for VM's error code
  * return value: the size of the response. The caller can check if the buffer
  *		was large enough by comparing the return value and rlen
- * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ * NOTE: If the response buffer is not in real storage, cpcmd can sleep
  */
-extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
 
 #endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index c5befc5a3bf5..b71735eab23f 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -9,9 +9,7 @@
 #ifndef _EBCDIC_H
 #define _EBCDIC_H
 
-#ifndef _S390_TYPES_H
-#include <types.h>
-#endif
+#include <linux/types.h>
 
 extern __u8 _ascebc_500[256];   /* ASCII -> EBCDIC 500 conversion table */
 extern __u8 _ebcasc_500[256];   /* EBCDIC 500 -> ASCII conversion table */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c92ed0170be2..65998a1f5d43 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -191,7 +191,7 @@ struct arch_elf_state {
 	} while (0)
 
 #define CORE_DUMP_USE_REGSET
-#define ELF_EXEC_PAGESIZE	4096
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
 
 /*
  * This is the base location for PIE (ET_DYN with INTERP) loads. On
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index a4811aa0304d..8f8eec9e1198 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -21,17 +21,12 @@
 		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		  "m" (*uaddr) : "cc");
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, newval, ret;
 
 	load_kernel_asce();
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
 
 	pagefault_disable();
 	switch (op) {
@@ -60,17 +55,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	}
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index edb5161df7e2..6810bd757312 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -81,7 +81,7 @@ struct ipl_parameter_block {
 		struct ipl_block_fcp fcp;
 		struct ipl_block_ccw ccw;
 	} ipl_info;
-} __attribute__((packed,aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
 
 /*
  * IPL validity flags
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8a5b082797f8..a6870ea6ea8b 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -95,46 +95,46 @@ struct lowcore {
 	__u64	int_clock;			/* 0x0310 */
 	__u64	mcck_clock;			/* 0x0318 */
 	__u64	clock_comparator;		/* 0x0320 */
+	__u64	boot_clock[2];			/* 0x0328 */
 
 	/* Current process. */
-	__u64	current_task;			/* 0x0328 */
-	__u8	pad_0x318[0x320-0x318];		/* 0x0330 */
-	__u64	kernel_stack;			/* 0x0338 */
+	__u64	current_task;			/* 0x0338 */
+	__u64	kernel_stack;			/* 0x0340 */
 
 	/* Interrupt, panic and restart stack. */
-	__u64	async_stack;			/* 0x0340 */
-	__u64	panic_stack;			/* 0x0348 */
-	__u64	restart_stack;			/* 0x0350 */
+	__u64	async_stack;			/* 0x0348 */
+	__u64	panic_stack;			/* 0x0350 */
+	__u64	restart_stack;			/* 0x0358 */
 
 	/* Restart function and parameter. */
-	__u64	restart_fn;			/* 0x0358 */
-	__u64	restart_data;			/* 0x0360 */
-	__u64	restart_source;			/* 0x0368 */
+	__u64	restart_fn;			/* 0x0360 */
+	__u64	restart_data;			/* 0x0368 */
+	__u64	restart_source;			/* 0x0370 */
 
 	/* Address space pointer. */
-	__u64	kernel_asce;			/* 0x0370 */
-	__u64	user_asce;			/* 0x0378 */
+	__u64	kernel_asce;			/* 0x0378 */
+	__u64	user_asce;			/* 0x0380 */
 
 	/*
 	 * The lpp and current_pid fields form a
 	 * 64-bit value that is set as program
 	 * parameter with the LPP instruction.
 	 */
-	__u32	lpp;				/* 0x0380 */
-	__u32	current_pid;			/* 0x0384 */
+	__u32	lpp;				/* 0x0388 */
+	__u32	current_pid;			/* 0x038c */
 
 	/* SMP info area */
-	__u32	cpu_nr;				/* 0x0388 */
-	__u32	softirq_pending;		/* 0x038c */
-	__u64	percpu_offset;			/* 0x0390 */
-	__u64	vdso_per_cpu_data;		/* 0x0398 */
-	__u64	machine_flags;			/* 0x03a0 */
-	__u32	preempt_count;			/* 0x03a8 */
-	__u8	pad_0x03ac[0x03b0-0x03ac];	/* 0x03ac */
-	__u64	gmap;				/* 0x03b0 */
-	__u32	spinlock_lockval;		/* 0x03b8 */
-	__u32	fpu_flags;			/* 0x03bc */
-	__u8	pad_0x03c0[0x0400-0x03c0];	/* 0x03c0 */
+	__u32	cpu_nr;				/* 0x0390 */
+	__u32	softirq_pending;		/* 0x0394 */
+	__u64	percpu_offset;			/* 0x0398 */
+	__u64	vdso_per_cpu_data;		/* 0x03a0 */
+	__u64	machine_flags;			/* 0x03a8 */
+	__u32	preempt_count;			/* 0x03b0 */
+	__u8	pad_0x03b4[0x03b8-0x03b4];	/* 0x03b4 */
+	__u64	gmap;				/* 0x03b8 */
+	__u32	spinlock_lockval;		/* 0x03c0 */
+	__u32	fpu_flags;			/* 0x03c4 */
+	__u8	pad_0x03c8[0x0400-0x03c8];	/* 0x03c8 */
 
 	/* Per cpu primary space access list */
 	__u32	paste[16];			/* 0x0400 */
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
deleted file mode 100644
index b79813d9cf68..000000000000
--- a/arch/s390/include/asm/mman.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/mman.h"
- */
-#ifndef __S390_MMAN_H__
-#define __S390_MMAN_H__
-
-#include <uapi/asm/mman.h>
-
-#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 24bc41622a98..72e9ca83a668 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,6 +12,7 @@
 #include <linux/mm_types.h>
 #include <asm/tlbflush.h>
 #include <asm/ctl_reg.h>
+#include <asm-generic/mm_hooks.h>
 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
@@ -33,7 +34,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.use_cmma = 0;
 #endif
 	switch (mm->context.asce_limit) {
-	case 1UL << 42:
+	case _REGION2_SIZE:
 		/*
 		 * forked 3-level task, fall through to set new asce with new
 		 * mm->pgd
@@ -49,12 +50,12 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
 		break;
-	case 1UL << 53:
+	case _REGION1_SIZE:
 		/* forked 4-level task, set new asce with new mm->pgd */
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
 		break;
-	case 1UL << 31:
+	case _REGION3_SIZE:
 		/* forked 2-level compat task, set new asce with new mm->pgd */
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
@@ -138,30 +139,4 @@ static inline void activate_mm(struct mm_struct *prev,
 	set_user_asce(next);
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-				 struct mm_struct *mm)
-{
-}
-
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-
-static inline void arch_unmap(struct mm_struct *mm,
-			struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
-{
-}
-
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
-				     struct vm_area_struct *vma)
-{
-}
-
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool execute, bool foreign)
-{
-	/* by default, allow everything */
-	return true;
-}
 #endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 9d91cf3e427f..c8e211b9a002 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -72,7 +72,7 @@ union mci {
 		u64 ar :  1; /* 33 access register validity */
 		u64 da :  1; /* 34 delayed access exception */
 		u64    :  1; /* 35 */
-		u64 gs :  1; /* 36 guarded storage registers */
+		u64 gs :  1; /* 36 guarded storage registers validity */
 		u64    :  5; /* 37-41 */
 		u64 pr :  1; /* 42 tod programmable register validity */
 		u64 fc :  1; /* 43 fp control register validity */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 42267a2fe29e..ca21b28a7b17 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -13,6 +13,7 @@
 #define ESSA_SET_POT_VOLATILE		4
 #define ESSA_SET_STABLE_RESIDENT	5
 #define ESSA_SET_STABLE_IF_RESIDENT	6
+#define ESSA_SET_STABLE_NODAT		7
 
 #define ESSA_MAX	ESSA_SET_STABLE_IF_RESIDENT
 
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 624deaa44230..5d5c2b3500a4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,10 +10,14 @@
 #include <linux/const.h>
 #include <asm/types.h>
 
+#define _PAGE_SHIFT	12
+#define _PAGE_SIZE	(_AC(1, UL) << _PAGE_SHIFT)
+#define _PAGE_MASK	(~(_PAGE_SIZE - 1))
+
 /* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT      12
-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK       (~(PAGE_SIZE-1))
+#define PAGE_SHIFT	_PAGE_SHIFT
+#define PAGE_SIZE	_PAGE_SIZE
+#define PAGE_MASK	_PAGE_MASK
 #define PAGE_DEFAULT_ACC	0
 #define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
 
@@ -133,6 +137,9 @@ static inline int page_reset_referenced(unsigned long addr)
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
+void arch_set_page_dat(struct page *page, int order);
+void arch_set_page_nodat(struct page *page, int order);
+int arch_test_page_nodat(struct page *page);
 void arch_set_page_states(int make_stable);
 
 static inline int devmem_is_allowed(unsigned long pfn)
@@ -145,16 +152,26 @@ static inline int devmem_is_allowed(unsigned long pfn)
 
 #endif /* !__ASSEMBLY__ */
 
-#define __PAGE_OFFSET           0x0UL
-#define PAGE_OFFSET             0x0UL
-#define __pa(x)                 (unsigned long)(x)
-#define __va(x)                 (void *)(unsigned long)(x)
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define __PAGE_OFFSET		0x0UL
+#define PAGE_OFFSET		0x0UL
+
+#define __pa(x)			((unsigned long)(x))
+#define __va(x)			((void *)(unsigned long)(x))
+
+#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
+#define phys_to_pfn(kaddr)	((kaddr) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(kaddr)	pfn_to_page(phys_to_pfn(kaddr))
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bb0ff1bb0c4a..a0d9167519b1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -15,6 +15,8 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 
+#define CRST_ALLOC_ORDER 2
+
 unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
 
@@ -42,16 +44,16 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 
 static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 {
-	clear_table(crst, entry, sizeof(unsigned long)*2048);
+	clear_table(crst, entry, _CRST_TABLE_SIZE);
 }
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-	if (mm->context.asce_limit <= (1UL << 31))
+	if (mm->context.asce_limit <= _REGION3_SIZE)
 		return _SEGMENT_ENTRY_EMPTY;
-	if (mm->context.asce_limit <= (1UL << 42))
+	if (mm->context.asce_limit <= _REGION2_SIZE)
 		return _REGION3_ENTRY_EMPTY;
-	if (mm->context.asce_limit <= (1UL << 53))
+	if (mm->context.asce_limit <= _REGION1_SIZE)
 		return _REGION2_ENTRY_EMPTY;
 	return _REGION1_ENTRY_EMPTY;
 }
@@ -119,7 +121,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	if (!table)
 		return NULL;
-	if (mm->context.asce_limit == (1UL << 31)) {
+	if (mm->context.asce_limit == _REGION3_SIZE) {
 		/* Forking a compat process with 2 page table levels */
 		if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
 			crst_table_free(mm, table);
@@ -131,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	if (mm->context.asce_limit == (1UL << 31))
+	if (mm->context.asce_limit == _REGION3_SIZE)
 		pgtable_pmd_page_dtor(virt_to_page(pgd));
 	crst_table_free(mm, (unsigned long *) pgd);
 }
@@ -158,4 +160,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 
 extern void rcu_table_freelist_finish(void);
 
+void vmem_map_init(void);
+void *vmem_crst_alloc(unsigned long val);
+pte_t *vmem_pte_alloc(void);
+
 #endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57057fb1cc07..dce708e061ea 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -11,19 +11,6 @@
 #ifndef _ASM_S390_PGTABLE_H
 #define _ASM_S390_PGTABLE_H
 
-/*
- * The Linux memory management assumes a three-level page table setup.
- * For s390 64 bit we use up to four of the five levels the hardware
- * provides (region first tables are not used).
- *
- * The "pgd_xxx()" functions are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- *
- * This file contains the functions and defines necessary to modify and use
- * the S390 page table tree.
- */
-#ifndef __ASSEMBLY__
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
@@ -34,9 +21,6 @@
 
 extern pgd_t swapper_pg_dir[];
 extern void paging_init(void);
-extern void vmem_map_init(void);
-pmd_t *vmem_pmd_alloc(void);
-pte_t *vmem_pte_alloc(void);
 
 enum {
 	PG_DIRECT_MAP_4K = 0,
@@ -77,38 +61,6 @@ extern unsigned long zero_page_mask;
 #define __HAVE_COLOR_ZERO_PAGE
 
 /* TODO: s390 cannot support io_remap_pfn_range... */
-#endif /* !__ASSEMBLY__ */
-
-/*
- * PMD_SHIFT determines the size of the area a second-level page
- * table can map
- * PGDIR_SHIFT determines what a third-level page table entry can map
- */
-#define PMD_SHIFT	20
-#define PUD_SHIFT	31
-#define P4D_SHIFT	42
-#define PGDIR_SHIFT	53
-
-#define PMD_SIZE        (1UL << PMD_SHIFT)
-#define PMD_MASK        (~(PMD_SIZE-1))
-#define PUD_SIZE	(1UL << PUD_SHIFT)
-#define PUD_MASK	(~(PUD_SIZE-1))
-#define P4D_SIZE	(1UL << P4D_SHIFT)
-#define P4D_MASK	(~(P4D_SIZE-1))
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/*
- * entries per page directory level: the S390 is two-level, so
- * we don't really have any PMD directory physically.
- * for S390 segment-table entries are combined to one PGD
- * that leads to 1024 pte per pgd
- */
-#define PTRS_PER_PTE	256
-#define PTRS_PER_PMD	2048
-#define PTRS_PER_PUD	2048
-#define PTRS_PER_P4D	2048
-#define PTRS_PER_PGD	2048
 
 #define FIRST_USER_ADDRESS  0UL
 
@@ -123,7 +75,6 @@ extern unsigned long zero_page_mask;
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
 
-#ifndef __ASSEMBLY__
 /*
  * The vmalloc and module area will always be on the topmost area of the
  * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
@@ -269,7 +220,7 @@ static inline int is_module_addr(void *addr)
  */
 
 /* Bits in the segment/region table address-space-control-element */
-#define _ASCE_ORIGIN		~0xfffUL/* segment table origin		    */
+#define _ASCE_ORIGIN		~0xfffUL/* region/segment table origin	    */
 #define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
 #define _ASCE_ALT_EVENT		0x80	/* storage alteration event control */
 #define _ASCE_SPACE_SWITCH	0x40	/* space switch event		    */
@@ -320,9 +271,9 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
 #define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
-#define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
-#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_NOEXEC	0x100	/* region no-execute bit	    */
+#define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* page table origin		    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
+#define _SEGMENT_ENTRY_NOEXEC	0x100	/* segment no-execute bit	    */
 #define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 
 #define _SEGMENT_ENTRY		(0)
@@ -340,6 +291,54 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
 #endif
 
+#define _CRST_ENTRIES	2048	/* number of region/segment table entries */
+#define _PAGE_ENTRIES	256	/* number of page table entries	*/
+
+#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
+#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
+
+#define _REGION1_SHIFT	53
+#define _REGION2_SHIFT	42
+#define _REGION3_SHIFT	31
+#define _SEGMENT_SHIFT	20
+
+#define _REGION1_INDEX	(0x7ffUL << _REGION1_SHIFT)
+#define _REGION2_INDEX	(0x7ffUL << _REGION2_SHIFT)
+#define _REGION3_INDEX	(0x7ffUL << _REGION3_SHIFT)
+#define _SEGMENT_INDEX	(0x7ffUL << _SEGMENT_SHIFT)
+#define _PAGE_INDEX	(0xffUL  << _PAGE_SHIFT)
+
+#define _REGION1_SIZE	(1UL << _REGION1_SHIFT)
+#define _REGION2_SIZE	(1UL << _REGION2_SHIFT)
+#define _REGION3_SIZE	(1UL << _REGION3_SHIFT)
+#define _SEGMENT_SIZE	(1UL << _SEGMENT_SHIFT)
+
+#define _REGION1_MASK	(~(_REGION1_SIZE - 1))
+#define _REGION2_MASK	(~(_REGION2_SIZE - 1))
+#define _REGION3_MASK	(~(_REGION3_SIZE - 1))
+#define _SEGMENT_MASK	(~(_SEGMENT_SIZE - 1))
+
+#define PMD_SHIFT	_SEGMENT_SHIFT
+#define PUD_SHIFT	_REGION3_SHIFT
+#define P4D_SHIFT	_REGION2_SHIFT
+#define PGDIR_SHIFT	_REGION1_SHIFT
+
+#define PMD_SIZE	_SEGMENT_SIZE
+#define PUD_SIZE	_REGION3_SIZE
+#define P4D_SIZE	_REGION2_SIZE
+#define PGDIR_SIZE	_REGION1_SIZE
+
+#define PMD_MASK	_SEGMENT_MASK
+#define PUD_MASK	_REGION3_MASK
+#define P4D_MASK	_REGION2_MASK
+#define PGDIR_MASK	_REGION1_MASK
+
+#define PTRS_PER_PTE	_PAGE_ENTRIES
+#define PTRS_PER_PMD	_CRST_ENTRIES
+#define PTRS_PER_PUD	_CRST_ENTRIES
+#define PTRS_PER_P4D	_CRST_ENTRIES
+#define PTRS_PER_PGD	_CRST_ENTRIES
+
 /*
  * Segment table and region3 table entry encoding
  * (R = read-only, I = invalid, y = young bit):
@@ -376,6 +375,7 @@ static inline int is_module_addr(void *addr)
 
 /* Guest Page State used for virtualization */
 #define _PGSTE_GPS_ZERO			0x0000000080000000UL
+#define _PGSTE_GPS_NODAT		0x0000000040000000UL
 #define _PGSTE_GPS_USAGE_MASK		0x0000000003000000UL
 #define _PGSTE_GPS_USAGE_STABLE		0x0000000000000000UL
 #define _PGSTE_GPS_USAGE_UNUSED		0x0000000001000000UL
@@ -505,7 +505,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
  * In the case that a guest uses storage keys
  * faults should no longer be backed by zero pages
  */
-#define mm_forbids_zeropage mm_use_skey
+#define mm_forbids_zeropage mm_has_pgste
 static inline int mm_use_skey(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
@@ -952,15 +952,30 @@ static inline pte_t pte_mkhuge(pte_t pte)
 #define IPTE_GLOBAL	0
 #define	IPTE_LOCAL	1
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
+#define IPTE_NODAT	0x400
+#define IPTE_GUEST_ASCE	0x800
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+			       unsigned long opt, unsigned long asce,
+			       int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + TLB flush for the pte */
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* Invalidation + TLB flush for the pte */
+		asm volatile(
+			"	.insn	rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+			: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+			  [m4] "i" (local));
+		return;
+	}
+
+	/* Invalidate ptes with options + TLB flush of the ptes */
+	opt = opt | (asce & _ASCE_ORIGIN);
 	asm volatile(
-		"       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
-		: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
-		  [m4] "i" (local));
+		"	.insn	rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+		: [r2] "+a" (address), [r3] "+a" (opt)
+		: [r1] "a" (pto), [m4] "i" (local) : "memory");
 }
 
 static inline void __ptep_ipte_range(unsigned long address, int nr,
@@ -1341,31 +1356,61 @@ static inline void __pmdp_csp(pmd_t *pmdp)
 #define IDTE_GLOBAL	0
 #define IDTE_LOCAL	1
 
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
+#define IDTE_PTOA	0x0800
+#define IDTE_NODAT	0x1000
+#define IDTE_GUEST_ASCE	0x2000
+
+static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+			       unsigned long opt, unsigned long asce,
+			       int local)
 {
 	unsigned long sto;
 
-	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
-		: "+m" (*pmdp)
-		: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
-		  [m4] "i" (local)
-		: "cc" );
+	sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* flush without guest asce */
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+			: "+m" (*pmdp)
+			: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
+			  [m4] "i" (local)
+			: "cc" );
+	} else {
+		/* flush with guest asce */
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+			: "+m" (*pmdp)
+			: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
+			  [r3] "a" (asce), [m4] "i" (local)
+			: "cc" );
+	}
 }
 
-static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
+static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+			       unsigned long opt, unsigned long asce,
+			       int local)
 {
 	unsigned long r3o;
 
-	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
 	r3o |= _ASCE_TYPE_REGION3;
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
-		: "+m" (*pudp)
-		: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
-		  [m4] "i" (local)
-		: "cc");
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* flush without guest asce */
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+			: "+m" (*pudp)
+			: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
+			  [m4] "i" (local)
+			: "cc");
+	} else {
+		/* flush with guest asce */
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+			: "+m" (*pudp)
+			: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
+			  [r3] "a" (asce), [m4] "i" (local)
+			: "cc" );
+	}
 }
 
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
@@ -1548,8 +1593,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#endif /* !__ASSEMBLY__ */
-
 #define kern_addr_valid(addr)   (1)
 
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 998b61cd0e56..eaee69e7c42a 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -80,7 +80,7 @@ struct qdr {
 	u32 qkey   : 4;
 	u32	   : 28;
 	struct qdesfmt0 qdf0[126];
-} __attribute__ ((packed, aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
 
 #define QIB_AC_OUTBOUND_PCI_SUPPORTED	0x40
 #define QIB_RFLAGS_ENABLE_QEBSM		0x80
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index cd78155b1829..490e035b3716 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,10 @@
 #define MACHINE_FLAG_TE		_BITUL(11)
 #define MACHINE_FLAG_TLB_LC	_BITUL(12)
 #define MACHINE_FLAG_VX		_BITUL(13)
-#define MACHINE_FLAG_NX		_BITUL(14)
-#define MACHINE_FLAG_GS		_BITUL(15)
+#define MACHINE_FLAG_TLB_GUEST	_BITUL(14)
+#define MACHINE_FLAG_NX		_BITUL(15)
+#define MACHINE_FLAG_GS		_BITUL(16)
+#define MACHINE_FLAG_SCC	_BITUL(17)
 
 #define LPP_MAGIC		_BITUL(31)
 #define LPP_PFAULT_PID_MASK	_AC(0xffffffff, UL)
@@ -68,8 +70,10 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
 #define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
 #define MACHINE_HAS_NX		(S390_lowcore.machine_flags & MACHINE_FLAG_NX)
 #define MACHINE_HAS_GS		(S390_lowcore.machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC		(S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
 
 /*
  * Console mode. Override with conmode=
@@ -104,9 +108,16 @@ extern void pfault_fini(void);
 #define pfault_fini()		do { } while (0)
 #endif /* CONFIG_PFAULT */
 
+#ifdef CONFIG_VMCP
+void vmcp_cma_reserve(void);
+#else
+static inline void vmcp_cma_reserve(void) { }
+#endif
+
 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
 
-extern void cmma_init(void);
+void cmma_init(void);
+void cmma_init_nodat(void);
 
 extern void (*_machine_restart)(char *command);
 extern void (*_machine_halt)(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f7838ecd83c6..8182b521c42f 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -92,17 +92,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	typecheck(int, lp->lock);
 	asm volatile(
-		"st	%1,%0\n"
-		: "+Q" (lp->lock)
-		: "d" (0)
-		: "cc", "memory");
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (arch_spin_is_locked(lock))
-		arch_spin_relax(lock);
-	smp_acquire__after_ctrl_dep();
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+		"	.long	0xb2fa0070\n"	/* NIAI 7 */
+#endif
+		"	st	%1,%0\n"
+		: "=Q" (lp->lock) : "d" (0) : "cc", "memory");
 }
 
 /*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 118535123f34..93f2eb3f277c 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -15,6 +15,8 @@
 /* The value of the TOD clock for 1.1.1970. */
 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
 
+extern u64 clock_comparator_max;
+
 /* Inline functions for clock register access. */
 static inline int set_tod_clock(__u64 time)
 {
@@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void)
 	unsigned long long old;
 
 	old = S390_lowcore.clock_comparator;
-	S390_lowcore.clock_comparator = -1ULL;
+	S390_lowcore.clock_comparator = clock_comparator_max;
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	return old;
 }
@@ -174,24 +176,24 @@ static inline cycles_t get_cycles(void)
 	return (cycles_t) get_tod_clock() >> 2;
 }
 
-int get_phys_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
 unsigned long long monotonic_clock(void);
 
-extern u64 sched_clock_base_cc;
+extern unsigned char tod_clock_base[16] __aligned(8);
 
 /**
  * get_clock_monotonic - returns current time in clock rate units
  *
  * The caller must ensure that preemption is disabled.
- * The clock and sched_clock_base get changed via stop_machine.
+ * The clock and tod_clock_base get changed via stop_machine.
  * Therefore preemption must be disabled when calling this
  * function, otherwise the returned value is not guaranteed to
  * be monotonic.
  */
 static inline unsigned long long get_tod_clock_monotonic(void)
 {
-	return get_tod_clock() - sched_clock_base_cc;
+	return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
 }
 
 /**
@@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval)
 	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
+/**
+ * tod_after - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after(unsigned long long a, unsigned long long b)
+{
+	if (MACHINE_HAS_SCC)
+		return (long long) a > (long long) b;
+	return a > b;
+}
+
+/**
+ * tod_after_eq - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after_eq(unsigned long long a, unsigned long long b)
+{
+	if (MACHINE_HAS_SCC)
+		return (long long) a >= (long long) b;
+	return a >= b;
+}
+
 #endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 2eb8ff0d6fca..3a14b864b2e3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -135,7 +135,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= (1UL << 31))
+	if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
 		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	tlb_remove_table(tlb, pmd);
@@ -151,7 +151,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= (1UL << 53))
+	if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
 		return;
 	tlb_remove_table(tlb, p4d);
 }
@@ -166,7 +166,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= (1UL << 42))
+	if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
 		return;
 	tlb_remove_table(tlb, pud);
 }
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 39846100682a..4d759f8f4bc7 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void)
  */
 static inline void __tlb_flush_idte(unsigned long asce)
 {
+	unsigned long opt;
+
+	opt = IDTE_PTOA;
+	if (MACHINE_HAS_TLB_GUEST)
+		opt |= IDTE_GUEST_ASCE;
 	/* Global TLB flush for the mm */
 	asm volatile(
 		"	.insn	rrf,0xb98e0000,0,%0,%1,0"
-		: : "a" (2048), "a" (asce) : "cc");
+		: : "a" (opt), "a" (asce) : "cc");
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fa1bfce10370..5222da162b69 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -77,12 +77,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
 	return &node_to_cpumask_map[node];
 }
 
-/*
- * Returns the number of the node containing node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 #define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
deleted file mode 100644
index 6740f4f9781f..000000000000
--- a/arch/s390/include/asm/types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/types.h"
- */
-#ifndef _S390_TYPES_H
-#define _S390_TYPES_H
-
-#include <uapi/asm/types.h>
-
-#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
deleted file mode 100644
index da9627afe5d8..000000000000
--- a/arch/s390/include/asm/unaligned.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_S390_UNALIGNED_H
-#define _ASM_S390_UNALIGNED_H
-
-/*
- * The S390 can do unaligned accesses itself. 
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_be
-#define put_unaligned	__put_unaligned_be
-
-#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index ca62066895e0..098f28778a13 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -9,4 +9,5 @@ generic-y += param.h
 generic-y += poll.h
 generic-y += resource.h
 generic-y += sockios.h
+generic-y += swab.h
 generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 1340311dab77..ab5797cdc1b7 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -72,7 +72,10 @@ typedef struct dasd_information2_t {
  * 0x02: use diag discipline (diag)
  * 0x04: set the device initially online (internal use only)
  * 0x08: enable ERP related logging
- * 0x20: give access to raw eckd data
+ * 0x10: allow I/O to fail on lost paths
+ * 0x20: allow I/O to fail when a lock was stolen
+ * 0x40: give access to raw eckd data
+ * 0x80: enable discard support
  */
 #define DASD_FEATURE_DEFAULT	     0x00
 #define DASD_FEATURE_READONLY	     0x01
@@ -82,6 +85,7 @@ typedef struct dasd_information2_t {
 #define DASD_FEATURE_FAILFAST	     0x10
 #define DASD_FEATURE_FAILONSLCK      0x20
 #define DASD_FEATURE_USERAW	     0x40
+#define DASD_FEATURE_DISCARD	     0x80
 
 #define DASD_PARTN_BITS 2
 
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 52a63f4175cb..a56916c83565 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
deleted file mode 100644
index da3bfe5cc161..000000000000
--- a/arch/s390/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _S390_SWAB_H
-#define _S390_SWAB_H
-
-/*
- *  S390 version
- *    Copyright IBM Corp. 1999
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#include <linux/types.h>
-
-#ifndef __s390x__
-# define __SWAB_64_THRU_32__
-#endif
-
-#ifdef __s390x__
-static inline __u64 __arch_swab64p(const __u64 *x)
-{
-	__u64 result;
-
-	asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
-	return result;
-}
-#define __arch_swab64p __arch_swab64p
-
-static inline __u64 __arch_swab64(__u64 x)
-{
-	__u64 result;
-
-	asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
-	return result;
-}
-#define __arch_swab64 __arch_swab64
-
-static inline void __arch_swab64s(__u64 *x)
-{
-	*x = __arch_swab64p(x);
-}
-#define __arch_swab64s __arch_swab64s
-#endif /* __s390x__ */
-
-static inline __u32 __arch_swab32p(const __u32 *x)
-{
-	__u32 result;
-	
-	asm volatile(
-#ifndef __s390x__
-		"	icm	%0,8,%O1+3(%R1)\n"
-		"	icm	%0,4,%O1+2(%R1)\n"
-		"	icm	%0,2,%O1+1(%R1)\n"
-		"	ic	%0,%1"
-		: "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
-		"	lrv	%0,%1"
-		: "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
-	return result;
-}
-#define __arch_swab32p __arch_swab32p
-
-#ifdef __s390x__
-static inline __u32 __arch_swab32(__u32 x)
-{
-	__u32 result;
-	
-	asm volatile("lrvr  %0,%1" : "=d" (result) : "d" (x));
-	return result;
-}
-#define __arch_swab32 __arch_swab32
-#endif /* __s390x__ */
-
-static inline __u16 __arch_swab16p(const __u16 *x)
-{
-	__u16 result;
-	
-	asm volatile(
-#ifndef __s390x__
-		"	icm	%0,2,%O1+1(%R1)\n"
-		"	ic	%0,%1\n"
-		: "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
-		"	lrvh	%0,%1"
-		: "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
-	return result;
-}
-#define __arch_swab16p __arch_swab16p
-
-#endif /* _S390_SWAB_H */
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
new file mode 100644
index 000000000000..4caf71714a55
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright IBM Corp. 2004, 2005
+ * Interface implementation for communication with the z/VM control program
+ * Version 1.0
+ * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ *
+ * z/VMs CP offers the possibility to issue commands via the diagnose code 8
+ * this driver implements a character device that issues these commands and
+ * returns the answer of CP.
+ *
+ * The idea of this driver is based on cpint from Neale Ferguson
+ */
+
+#ifndef _UAPI_ASM_VMCP_H
+#define _UAPI_ASM_VMCP_H
+
+#include <linux/ioctl.h>
+
+#define VMCP_GETCODE	_IOR(0x10, 1, int)
+#define VMCP_SETBUF	_IOW(0x10, 2, int)
+#define VMCP_GETSIZE	_IOR(0x10, 3, int)
+
+#endif /* _UAPI_ASM_VMCP_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index b65c414b6c0e..3d42f91c95fd 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -158,6 +158,7 @@ int main(void)
 	OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
 	OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
 	OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
+	OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
 	OFFSET(__LC_CURRENT, lowcore, current_task);
 	OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
 	OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 9f0e4a2785f7..63bc6603e0ed 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/mm.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/cpcmd.h>
@@ -28,9 +29,7 @@ static int diag8_noresponse(int cmdlen)
 	register unsigned long reg3 asm ("3") = cmdlen;
 
 	asm volatile(
-		"	sam31\n"
 		"	diag	%1,%0,0x8\n"
-		"	sam64\n"
 		: "+d" (reg3) : "d" (reg2) : "cc");
 	return reg3;
 }
@@ -43,9 +42,7 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
 	register unsigned long reg5 asm ("5") = *rlen;
 
 	asm volatile(
-		"	sam31\n"
 		"	diag	%2,%0,0x8\n"
-		"	sam64\n"
 		"	brc	8,1f\n"
 		"	agr	%1,%4\n"
 		"1:\n"
@@ -57,7 +54,6 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
 
 /*
  * __cpcmd has some restrictions over cpcmd
- *  - the response buffer must reside below 2GB (if any)
  *  - __cpcmd is unlocked and therefore not SMP-safe
  */
 int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
@@ -88,13 +84,12 @@ EXPORT_SYMBOL(__cpcmd);
 
 int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 {
+	unsigned long flags;
 	char *lowbuf;
 	int len;
-	unsigned long flags;
 
-	if ((virt_to_phys(response) != (unsigned long) response) ||
-			(((unsigned long)response + rlen) >> 31)) {
-		lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+	if (is_vmalloc_or_module_addr(response)) {
+		lowbuf = kmalloc(rlen, GFP_KERNEL);
 		if (!lowbuf) {
 			pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
 			return -ENOMEM;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 86b3e74f569e..1d9e83c401fc 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -866,7 +866,8 @@ static inline void
 debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
 			int exception)
 {
-	active->id.stck = get_tod_clock_fast() - sched_clock_base_cc;
+	active->id.stck = get_tod_clock_fast() -
+		*(unsigned long long *) &tod_clock_base[1];
 	active->id.fields.cpuid = smp_processor_id();
 	active->caller = __builtin_return_address(0);
 	active->id.fields.exception = exception;
@@ -1455,15 +1456,15 @@ int
 debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
 			 int area, debug_entry_t * entry, char *out_buf)
 {
-	unsigned long sec, usec;
+	unsigned long base, sec, usec;
 	char *except_str;
 	unsigned long caller;
 	int rc = 0;
 	unsigned int level;
 
 	level = entry->id.fields.level;
-	sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12);
-	sec = sec - (TOD_UNIX_EPOCH >> 12);
+	base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
+	sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
 	usec = do_div(sec, USEC_PER_SEC);
 
 	if (entry->id.fields.exception)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dab78babfab6..2aa545dca4d5 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -76,7 +76,7 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
 #ifdef CONFIG_CHECK_STACK
 	sp = __dump_trace(func, data, sp,
-			  S390_lowcore.panic_stack + frame_size - 4096,
+			  S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
 			  S390_lowcore.panic_stack + frame_size);
 #endif
 	sp = __dump_trace(func, data, sp,
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 5d20182ee8ae..ca8cd80e8feb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -53,8 +53,9 @@ static void __init reset_tod_clock(void)
 	if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
 		disabled_wait(0);
 
-	sched_clock_base_cc = TOD_UNIX_EPOCH;
-	S390_lowcore.last_update_clock = sched_clock_base_cc;
+	memset(tod_clock_base, 0, 16);
+	*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
 }
 
 #ifdef CONFIG_SHARED_KERNEL
@@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void)
 	}
 
 	/* re-initialize cputime accounting. */
-	sched_clock_base_cc = get_tod_clock();
-	S390_lowcore.last_update_clock = sched_clock_base_cc;
+	get_tod_clock_ext(tod_clock_base);
+	S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1];
 	S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
 	S390_lowcore.user_timer = 0;
 	S390_lowcore.system_timer = 0;
@@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void)
 	}
 	if (test_facility(133))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+	if (test_facility(139) && (tod_clock_base[1] & 0x80)) {
+		/* Enabled signed clock comparator comparisons */
+		S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+		clock_comparator_max = -1ULL >> 1;
+		__ctl_set_bit(0, 53);
+	}
 }
 
 static inline void save_vector_registers(void)
@@ -413,7 +420,7 @@ static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
 	__ctl_clear_bit(0, 17);
-	return 1;
+	return 0;
 }
 early_param("novx", disable_vector_extension);
 
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index eff5b31671d4..8ed753c72d9b 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -302,7 +302,8 @@ ENTRY(startup_kdump)
 	xc	0xe00(256),0xe00
 	xc	0xf00(256),0xf00
 	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
-	stck	__LC_LAST_UPDATE_CLOCK
+	stcke	__LC_BOOT_CLOCK
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
 	l	%r15,.Lstack-.LPG0(%r13)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 31c91f24e562..0d8f2a858ced 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -21,8 +21,8 @@ ENTRY(startup_continue)
 	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
 	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC
 	.insn	s,0xb2800000,__LC_LPP		# load program parameter
-0:	larl	%r1,sched_clock_base_cc
-	mvc	0(8,%r1),__LC_LAST_UPDATE_CLOCK
+0:	larl	%r1,tod_clock_base
+	mvc	0(16,%r1),__LC_BOOT_CLOCK
 	larl	%r13,.LPG1		# get base
 	lctlg	%c0,%c15,.Lctl-.LPG1(%r13)	# load control registers
 	lg	%r12,.Lparmaddr-.LPG1(%r13)	# pointer to parameter area
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6dca93b29bed..a2fdff0e730b 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+	if (tod_after_eq(S390_lowcore.int_clock,
+			 S390_lowcore.clock_comparator))
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
 	generic_handle_irq(irq);
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index cfac28330b03..4bdc65636603 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -7,6 +7,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/page.h>
 #include <asm/sigp.h>
 
 /*
@@ -55,8 +56,8 @@ ENTRY(relocate_kernel)
 	.back_pgm:
 		lmg	%r0,%r15,gprregs-.base(%r13)
 	.top:
-		lghi	%r7,4096	# load PAGE_SIZE in r7
-		lghi	%r9,4096	# load PAGE_SIZE in r9
+		lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
+		lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
 		lg	%r5,0(%r2)	# read another word for indirection page
 		aghi	%r2,8		# increment pointer
 		tml	%r5,0x1		# is it a destination page?
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3d1d808ea8a9..164a1e16b53e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,7 +305,7 @@ static void __init setup_lowcore(void)
 	/*
 	 * Setup lowcore for boot cpu
 	 */
-	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
+	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
 	lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
@@ -323,7 +323,7 @@ static void __init setup_lowcore(void)
 	lc->io_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
-	lc->clock_comparator = -1ULL;
+	lc->clock_comparator = clock_comparator_max;
 	lc->kernel_stack = ((unsigned long) &init_thread_union)
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->async_stack = (unsigned long)
@@ -469,10 +469,10 @@ static void __init setup_memory_end(void)
 	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
 	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 	tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
-	if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
-		vmax = 1UL << 42;	/* 3-level kernel page table */
+	if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+		vmax = _REGION2_SIZE; /* 3-level kernel page table */
 	else
-		vmax = 1UL << 53;	/* 4-level kernel page table */
+		vmax = _REGION1_SIZE; /* 4-level kernel page table */
 	/* module area is at the end of the kernel address space. */
 	MODULES_END = vmax;
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -818,6 +818,9 @@ static int __init setup_hwcaps(void)
 	case 0x2965:
 		strcpy(elf_platform, "z13");
 		break;
+	case 0x3906:
+		strcpy(elf_platform, "z14");
+		break;
 	}
 
 	/*
@@ -922,6 +925,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_memory_end();
 	setup_memory();
 	dma_contiguous_reserve(memory_end);
+	vmcp_cma_reserve();
 
 	check_initrd();
 	reserve_crashkernel();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1020a11a24e5..1cee6753d47a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1181,6 +1181,7 @@ static int __init s390_smp_init(void)
 
 	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
 			       smp_cpu_online, smp_cpu_pre_down);
+	rc = rc <= 0 ? rc : 0;
 out:
 	return rc;
 }
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 39e2f41b6cf0..c8ea715bfe10 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages)
  */
 void page_key_read(unsigned long *pfn)
 {
+	struct page *page;
 	unsigned long addr;
-
-	addr = (unsigned long) page_address(pfn_to_page(*pfn));
-	*(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+	unsigned char key;
+
+	page = pfn_to_page(*pfn);
+	addr = (unsigned long) page_address(page);
+	key = (unsigned char) page_get_storage_key(addr) & 0x7f;
+	if (arch_test_page_nodat(page))
+		key |= 0x80;
+	*(unsigned char *) pfn = key;
 }
 
 /*
@@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn)
  */
 void page_key_write(void *address)
 {
-	page_set_storage_key((unsigned long) address,
-			     page_key_rp->data[page_key_rx], 0);
+	struct page *page;
+	unsigned char key;
+
+	key = page_key_rp->data[page_key_rx];
+	page_set_storage_key((unsigned long) address, key & 0x7f, 0);
+	page = virt_to_page(address);
+	if (key & 0x80)
+		arch_set_page_nodat(page, 0);
+	else
+		arch_set_page_dat(page, 0);
 	if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
 		return;
 	page_key_rp = page_key_rp->next;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 192efdfac918..5cbd52169348 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -51,8 +51,15 @@
 #include <asm/cio.h>
 #include "entry.h"
 
-u64 sched_clock_base_cc = -1;	/* Force to data section. */
-EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+unsigned char tod_clock_base[16] __aligned(8) = {
+	/* Force to data section. */
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
@@ -75,7 +82,7 @@ void __init time_early_init(void)
 	struct ptff_qui qui;
 
 	/* Initialize TOD steering parameters */
-	tod_steering_end = sched_clock_base_cc;
+	tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
 	vdso_data->ts_end = tod_steering_end;
 
 	if (!test_facility(28))
@@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void)
 }
 EXPORT_SYMBOL(monotonic_clock);
 
-static void tod_to_timeval(__u64 todval, struct timespec64 *xt)
+static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
 {
-	unsigned long long sec;
+	unsigned long long high, low, rem, sec, nsec;
+
+	/* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
+	high = (*(unsigned long long *) clk) >> 4;
+	low = (*(unsigned long long *)&clk[7]) << 4;
+	/* Calculate seconds and nano-seconds */
+	sec = high;
+	rem = do_div(sec, 1000000);
+	nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
 
-	sec = todval >> 12;
-	do_div(sec, 1000000);
 	xt->tv_sec = sec;
-	todval -= (sec * 1000000) << 12;
-	xt->tv_nsec = ((todval * 1000) >> 12);
+	xt->tv_nsec = nsec;
 }
 
 void clock_comparator_work(void)
 {
 	struct clock_event_device *cd;
 
-	S390_lowcore.clock_comparator = -1ULL;
+	S390_lowcore.clock_comparator = clock_comparator_max;
 	cd = this_cpu_ptr(&comparators);
 	cd->event_handler(cd);
 }
@@ -148,7 +160,7 @@ void init_cpu_timer(void)
 	struct clock_event_device *cd;
 	int cpu;
 
-	S390_lowcore.clock_comparator = -1ULL;
+	S390_lowcore.clock_comparator = clock_comparator_max;
 	set_clock_comparator(S390_lowcore.clock_comparator);
 
 	cpu = smp_processor_id();
@@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 				       unsigned long param64)
 {
 	inc_irq_stat(IRQEXT_CLK);
-	if (S390_lowcore.clock_comparator == -1ULL)
+	if (S390_lowcore.clock_comparator == clock_comparator_max)
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
@@ -197,18 +209,28 @@ static void stp_reset(void);
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
-	__u64 clock;
+	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+	__u64 delta;
 
-	clock = get_tod_clock() - initial_leap_seconds;
-	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	get_tod_clock_ext(clk);
+	*(__u64 *) &clk[1] -= delta;
+	if (*(__u64 *) &clk[1] > delta)
+		clk[0]--;
+	ext_to_timespec64(clk, ts);
 }
 
 void read_boot_clock64(struct timespec64 *ts)
 {
-	__u64 clock;
+	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+	__u64 delta;
 
-	clock = sched_clock_base_cc - initial_leap_seconds;
-	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	memcpy(clk, tod_clock_base, 16);
+	*(__u64 *) &clk[1] -= delta;
+	if (*(__u64 *) &clk[1] > delta)
+		clk[0]--;
+	ext_to_timespec64(clk, ts);
 }
 
 static u64 read_tod_clock(struct clocksource *cs)
@@ -335,7 +357,7 @@ static unsigned long clock_sync_flags;
  * source. If the clock mode is local it will return -EOPNOTSUPP and
  * -EAGAIN if the clock is not in sync with the external reference.
  */
-int get_phys_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long *clock)
 {
 	atomic_t *sw_ptr;
 	unsigned int sw0, sw1;
@@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta)
 	struct ptff_qto qto;
 
 	/* Fixup the monotonic sched clock. */
-	sched_clock_base_cc += delta;
+	*(unsigned long long *) &tod_clock_base[1] += delta;
+	if (*(unsigned long long *) &tod_clock_base[1] < delta)
+		/* Epoch overflow */
+		tod_clock_base[0]++;
 	/* Adjust TOD steering parameters. */
 	vdso_data->tb_update_count++;
 	now = get_tod_clock();
@@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta)
 static void clock_sync_local(unsigned long long delta)
 {
 	/* Add the delta to the clock comparator. */
-	if (S390_lowcore.clock_comparator != -1ULL) {
+	if (S390_lowcore.clock_comparator != clock_comparator_max) {
 		S390_lowcore.clock_comparator += delta;
 		set_clock_comparator(S390_lowcore.clock_comparator);
 	}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index b89d19f6f2ab..eacda05b45d7 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
 	page_frame = get_zeroed_page(GFP_KERNEL);
 	if (!segment_table || !page_table || !page_frame)
 		goto out;
+	arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
+	arch_set_page_dat(virt_to_page(page_table), 0);
 
 	/* Initialize per-cpu vdso data page */
 	vd = (struct vdso_per_cpu_data *) page_frame;
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index 8f048c2d6d13..263a7f9eee1e 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -2,6 +2,8 @@
  * This is the infamous ld script for the 32 bits vdso
  * library
  */
+
+#include <asm/page.h>
 #include <asm/vdso.h>
 
 OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
@@ -91,7 +93,7 @@ SECTIONS
 	.debug_ranges	0 : { *(.debug_ranges) }
 	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
 
-	. = ALIGN(4096);
+	. = ALIGN(PAGE_SIZE);
 	PROVIDE(_vdso_data = .);
 
 	/DISCARD/	: {
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index f35455d497fe..9e3dbbcc1cfc 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -2,6 +2,8 @@
  * This is the infamous ld script for the 64 bits vdso
  * library
  */
+
+#include <asm/page.h>
 #include <asm/vdso.h>
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -91,7 +93,7 @@ SECTIONS
 	.debug_ranges	0 : { *(.debug_ranges) }
 	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
 
-	. = ALIGN(4096);
+	. = ALIGN(PAGE_SIZE);
 	PROVIDE(_vdso_data = .);
 
 	/DISCARD/	: {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index ce865bd4f81d..e4d36094aceb 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,7 +27,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
-	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
 	vcpu->stat.diagnose_10++;
 
 	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
@@ -51,9 +51,9 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 		 */
 		gmap_discard(vcpu->arch.gmap, start, prefix);
 		if (start <= prefix)
-			gmap_discard(vcpu->arch.gmap, 0, 4096);
-		if (end > prefix + 4096)
-			gmap_discard(vcpu->arch.gmap, 4096, 8192);
+			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+		if (end > prefix + PAGE_SIZE)
+			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
 		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
 	}
 	return 0;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 653cae5e1ee1..3cc77391a102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -629,7 +629,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
 	if (asce.r)
 		goto real_address;
-	ptr = asce.origin * 4096;
+	ptr = asce.origin * PAGE_SIZE;
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
 		if (vaddr.rfx01 > asce.tl)
@@ -674,7 +674,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 			return PGM_REGION_SECOND_TRANS;
 		if (edat1)
 			dat_protection |= rfte.p;
-		ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
 	}
 		/* fallthrough */
 	case ASCE_TYPE_REGION2: {
@@ -692,7 +692,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 			return PGM_REGION_THIRD_TRANS;
 		if (edat1)
 			dat_protection |= rste.p;
-		ptr = rste.rto * 4096 + vaddr.rtx * 8;
+		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
 	}
 		/* fallthrough */
 	case ASCE_TYPE_REGION3: {
@@ -720,7 +720,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 			return PGM_SEGMENT_TRANSLATION;
 		if (edat1)
 			dat_protection |= rtte.fc0.p;
-		ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
 	}
 		/* fallthrough */
 	case ASCE_TYPE_SEGMENT: {
@@ -743,7 +743,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 			goto absolute_address;
 		}
 		dat_protection |= ste.fc0.p;
-		ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
 	}
 	}
 	if (kvm_is_error_gpa(vcpu->kvm, ptr))
@@ -993,7 +993,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 	parent = sg->parent;
 	vaddr.addr = saddr;
 	asce.val = sg->orig_asce;
-	ptr = asce.origin * 4096;
+	ptr = asce.origin * PAGE_SIZE;
 	if (asce.r) {
 		*fake = 1;
 		ptr = 0;
@@ -1029,7 +1029,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region1_table_entry rfte;
 
 		if (*fake) {
-			ptr += (unsigned long) vaddr.rfx << 53;
+			ptr += vaddr.rfx * _REGION1_SIZE;
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
@@ -1044,7 +1044,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			return PGM_REGION_SECOND_TRANS;
 		if (sg->edat_level >= 1)
 			*dat_protection |= rfte.p;
-		ptr = rfte.rto << 12UL;
+		ptr = rfte.rto * PAGE_SIZE;
 shadow_r2t:
 		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
 		if (rc)
@@ -1055,7 +1055,7 @@ shadow_r2t:
 		union region2_table_entry rste;
 
 		if (*fake) {
-			ptr += (unsigned long) vaddr.rsx << 42;
+			ptr += vaddr.rsx * _REGION2_SIZE;
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
@@ -1070,7 +1070,7 @@ shadow_r2t:
 			return PGM_REGION_THIRD_TRANS;
 		if (sg->edat_level >= 1)
 			*dat_protection |= rste.p;
-		ptr = rste.rto << 12UL;
+		ptr = rste.rto * PAGE_SIZE;
 shadow_r3t:
 		rste.p |= *dat_protection;
 		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
@@ -1082,7 +1082,7 @@ shadow_r3t:
 		union region3_table_entry rtte;
 
 		if (*fake) {
-			ptr += (unsigned long) vaddr.rtx << 31;
+			ptr += vaddr.rtx * _REGION3_SIZE;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1098,7 +1098,7 @@ shadow_r3t:
 		if (rtte.fc && sg->edat_level >= 2) {
 			*dat_protection |= rtte.fc0.p;
 			*fake = 1;
-			ptr = rtte.fc1.rfaa << 31UL;
+			ptr = rtte.fc1.rfaa * _REGION3_SIZE;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1106,7 +1106,7 @@ shadow_r3t:
 			return PGM_SEGMENT_TRANSLATION;
 		if (sg->edat_level >= 1)
 			*dat_protection |= rtte.fc0.p;
-		ptr = rtte.fc0.sto << 12UL;
+		ptr = rtte.fc0.sto * PAGE_SIZE;
 shadow_sgt:
 		rtte.fc0.p |= *dat_protection;
 		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
@@ -1118,7 +1118,7 @@ shadow_sgt:
 		union segment_table_entry ste;
 
 		if (*fake) {
-			ptr += (unsigned long) vaddr.sx << 20;
+			ptr += vaddr.sx * _SEGMENT_SIZE;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
@@ -1134,11 +1134,11 @@ shadow_sgt:
 		*dat_protection |= ste.fc0.p;
 		if (ste.fc && sg->edat_level >= 1) {
 			*fake = 1;
-			ptr = ste.fc1.sfaa << 20UL;
+			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
-		ptr = ste.fc0.pto << 11UL;
+		ptr = ste.fc0.pto * (PAGE_SIZE / 2);
 shadow_pgt:
 		ste.fc0.p |= *dat_protection;
 		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
@@ -1187,8 +1187,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 
 	vaddr.addr = saddr;
 	if (fake) {
-		/* offset in 1MB guest memory block */
-		pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
+		pte.val = pgt + vaddr.px * PAGE_SIZE;
 		goto shadow_page;
 	}
 	if (!rc)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8a1dac793d6b..785ad028bde6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -329,7 +329,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 	start = kvm_s390_logical_to_effective(vcpu, start);
 	if (m3 & SSKE_MB) {
 		/* start already designates an absolute address */
-		end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
 	} else {
 		start = kvm_s390_real_to_abs(vcpu, start);
 		end = start + PAGE_SIZE;
@@ -893,10 +893,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	case 0x00000000:
 		/* only 4k frames specify a real address */
 		start = kvm_s390_real_to_abs(vcpu, start);
-		end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+		end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 		break;
 	case 0x00001000:
-		end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
 		break;
 	case 0x00002000:
 		/* only support 2G frame size if EDAT2 is available and we are
@@ -904,7 +904,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		if (!test_kvm_facility(vcpu->kvm, 78) ||
 		    psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+		end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
 		break;
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 715c19c45d9a..ba8203e4d516 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1069,7 +1069,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
+	BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
 	scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
 
 	/* 512 byte alignment */
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 92e90e40b6fb..7f17555ad4d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs)
 	end = get_tod_clock_fast() + (usecs << 12);
 	do {
 		clock_saved = 0;
-		if (end < S390_lowcore.clock_comparator) {
+		if (tod_after(S390_lowcore.clock_comparator, end)) {
 			clock_saved = local_tick_disable();
 			set_clock_comparator(end);
 		}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ffb15bd4c593..b12663d653d8 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -32,42 +32,63 @@ static int __init spin_retry_setup(char *str)
 }
 __setup("spin_retry=", spin_retry_setup);
 
+static inline int arch_load_niai4(int *lock)
+{
+	int owner;
+
+	asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+		"	.long	0xb2fa0040\n"	/* NIAI 4 */
+#endif
+		"	l	%0,%1\n"
+		: "=d" (owner) : "Q" (*lock) : "memory");
+       return owner;
+}
+
+static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+{
+	int expected = old;
+
+	asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+		"	.long	0xb2fa0080\n"	/* NIAI 8 */
+#endif
+		"	cs	%0,%3,%1\n"
+		: "=d" (old), "=Q" (*lock)
+		: "0" (old), "d" (new), "Q" (*lock)
+		: "cc", "memory");
+	return expected == old;
+}
+
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
 	int cpu = SPINLOCK_LOCKVAL;
-	int owner, count, first_diag;
+	int owner, count;
+
+	/* Pass the virtual CPU to the lock holder if it is not running */
+	owner = arch_load_niai4(&lp->lock);
+	if (owner && arch_vcpu_is_preempted(~owner))
+		smp_yield_cpu(~owner);
 
-	first_diag = 1;
+	count = spin_retry;
 	while (1) {
-		owner = ACCESS_ONCE(lp->lock);
+		owner = arch_load_niai4(&lp->lock);
 		/* Try to get the lock if it is free. */
 		if (!owner) {
-			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+			if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
 				return;
 			continue;
 		}
-		/* First iteration: check if the lock owner is running. */
-		if (first_diag && arch_vcpu_is_preempted(~owner)) {
-			smp_yield_cpu(~owner);
-			first_diag = 0;
+		if (count-- >= 0)
 			continue;
-		}
-		/* Loop for a while on the lock value. */
 		count = spin_retry;
-		do {
-			owner = ACCESS_ONCE(lp->lock);
-		} while (owner && count-- > 0);
-		if (!owner)
-			continue;
 		/*
 		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
 		 * yield the CPU unconditionally. For LPAR rely on the
 		 * sense running status.
 		 */
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
 			smp_yield_cpu(~owner);
-			first_diag = 0;
-		}
 	}
 }
 EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -75,42 +96,36 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
 	int cpu = SPINLOCK_LOCKVAL;
-	int owner, count, first_diag;
+	int owner, count;
 
 	local_irq_restore(flags);
-	first_diag = 1;
+
+	/* Pass the virtual CPU to the lock holder if it is not running */
+	owner = arch_load_niai4(&lp->lock);
+	if (owner && arch_vcpu_is_preempted(~owner))
+		smp_yield_cpu(~owner);
+
+	count = spin_retry;
 	while (1) {
-		owner = ACCESS_ONCE(lp->lock);
+		owner = arch_load_niai4(&lp->lock);
 		/* Try to get the lock if it is free. */
 		if (!owner) {
 			local_irq_disable();
-			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+			if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
 				return;
 			local_irq_restore(flags);
 			continue;
 		}
-		/* Check if the lock owner is running. */
-		if (first_diag && arch_vcpu_is_preempted(~owner)) {
-			smp_yield_cpu(~owner);
-			first_diag = 0;
+		if (count-- >= 0)
 			continue;
-		}
-		/* Loop for a while on the lock value. */
 		count = spin_retry;
-		do {
-			owner = ACCESS_ONCE(lp->lock);
-		} while (owner && count-- > 0);
-		if (!owner)
-			continue;
 		/*
 		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
 		 * yield the CPU unconditionally. For LPAR rely on the
 		 * sense running status.
 		 */
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
 			smp_yield_cpu(~owner);
-			first_diag = 0;
-		}
 	}
 }
 EXPORT_SYMBOL(arch_spin_lock_wait_flags);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index b3bd3f23b8e8..4ea9106417ee 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,8 +15,30 @@
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
 
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
 static DEFINE_STATIC_KEY_FALSE(have_mvcos);
 
+static int __init uaccess_init(void)
+{
+	if (test_facility(27))
+		static_branch_enable(&have_mvcos);
+	return 0;
+}
+early_initcall(uaccess_init);
+
+static inline int copy_with_mvcos(void)
+{
+	if (static_branch_likely(&have_mvcos))
+		return 1;
+	return 0;
+}
+#else
+static inline int copy_with_mvcos(void)
+{
+	return 1;
+}
+#endif
+
 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
 						 unsigned long size)
 {
@@ -84,7 +106,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 
 unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (static_branch_likely(&have_mvcos))
+	if (copy_with_mvcos())
 		return copy_from_user_mvcos(to, from, n);
 	return copy_from_user_mvcp(to, from, n);
 }
@@ -157,7 +179,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 
 unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (static_branch_likely(&have_mvcos))
+	if (copy_with_mvcos())
 		return copy_to_user_mvcos(to, from, n);
 	return copy_to_user_mvcs(to, from, n);
 }
@@ -220,7 +242,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
 
 unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	if (static_branch_likely(&have_mvcos))
+	if (copy_with_mvcos())
 		return copy_in_user_mvcos(to, from, n);
 	return copy_in_user_mvc(to, from, n);
 }
@@ -292,7 +314,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 
 unsigned long __clear_user(void __user *to, unsigned long size)
 {
-	if (static_branch_likely(&have_mvcos))
+	if (copy_with_mvcos())
 			return clear_user_mvcos(to, size);
 	return clear_user_xc(to, size);
 }
@@ -349,11 +371,3 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 	return done;
 }
 EXPORT_SYMBOL(__strncpy_from_user);
-
-static int __init uaccess_init(void)
-{
-	if (test_facility(27))
-		static_branch_enable(&have_mvcos);
-	return 0;
-}
-early_initcall(uaccess_init);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14f25798b001..bdabb013537b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 	pr_alert("AS:%016lx ", asce);
 	switch (asce & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
-		table = table + ((address >> 53) & 0x7ff);
+		table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
 		if (bad_address(table))
 			goto bad;
 		pr_cont("R1:%016lx ", *table);
@@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_REGION2:
-		table = table + ((address >> 42) & 0x7ff);
+		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if (bad_address(table))
 			goto bad;
 		pr_cont("R2:%016lx ", *table);
@@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_REGION3:
-		table = table + ((address >> 31) & 0x7ff);
+		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if (bad_address(table))
 			goto bad;
 		pr_cont("R3:%016lx ", *table);
@@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* fallthrough */
 	case _ASCE_TYPE_SEGMENT:
-		table = table + ((address >> 20) & 0x7ff);
+		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 		if (bad_address(table))
 			goto bad;
 		pr_cont("S:%016lx ", *table);
@@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
 			goto out;
 		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
 	}
-	table = table + ((address >> 12) & 0xff);
+	table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
 	if (bad_address(table))
 		goto bad;
 	pr_cont("P:%016lx ", *table);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fb3d3cdb370..9e1494e3d849 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit)
 	unsigned long *table;
 	unsigned long etype, atype;
 
-	if (limit < (1UL << 31)) {
-		limit = (1UL << 31) - 1;
+	if (limit < _REGION3_SIZE) {
+		limit = _REGION3_SIZE - 1;
 		atype = _ASCE_TYPE_SEGMENT;
 		etype = _SEGMENT_ENTRY_EMPTY;
-	} else if (limit < (1UL << 42)) {
-		limit = (1UL << 42) - 1;
+	} else if (limit < _REGION2_SIZE) {
+		limit = _REGION2_SIZE - 1;
 		atype = _ASCE_TYPE_REGION3;
 		etype = _REGION3_ENTRY_EMPTY;
-	} else if (limit < (1UL << 53)) {
-		limit = (1UL << 53) - 1;
+	} else if (limit < _REGION1_SIZE) {
+		limit = _REGION1_SIZE - 1;
 		atype = _ASCE_TYPE_REGION2;
 		etype = _REGION2_ENTRY_EMPTY;
 	} else {
@@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
 	spin_lock_init(&gmap->guest_table_lock);
 	spin_lock_init(&gmap->shadow_lock);
 	atomic_set(&gmap->ref_count, 1);
-	page = alloc_pages(GFP_KERNEL, 2);
+	page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
 	if (!page)
 		goto out_free;
 	page->index = 0;
@@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap)
 		gmap_flush_tlb(gmap);
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, 2);
+		__free_pages(page, CRST_ALLOC_ORDER);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 
@@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, 2);
+	page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
 	if (!page)
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
@@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	}
 	spin_unlock(&gmap->guest_table_lock);
 	if (page)
-		__free_pages(page, 2);
+		__free_pages(page, CRST_ALLOC_ORDER);
 	return 0;
 }
 
@@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	/* Create higher level tables in the gmap page table */
 	table = gmap->table;
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
-		table += (gaddr >> 53) & 0x7ff;
+		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
 		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
-				     gaddr & 0xffe0000000000000UL))
+				     gaddr & _REGION1_MASK))
 			return -ENOMEM;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
-		table += (gaddr >> 42) & 0x7ff;
+		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
-				     gaddr & 0xfffffc0000000000UL))
+				     gaddr & _REGION2_MASK))
 			return -ENOMEM;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
-		table += (gaddr >> 31) & 0x7ff;
+		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if ((*table & _REGION_ENTRY_INVALID) &&
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
-				     gaddr & 0xffffffff80000000UL))
+				     gaddr & _REGION3_MASK))
 			return -ENOMEM;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 	}
-	table += (gaddr >> 20) & 0x7ff;
+	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 	/* Walk the parent mm page table */
 	mm = gmap->mm;
 	pgd = pgd_offset(mm, vmaddr);
@@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 	table = gmap->table;
 	switch (gmap->asce & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
-		table += (gaddr >> 53) & 0x7ff;
+		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
 		if (level == 4)
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
@@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* Fallthrough */
 	case _ASCE_TYPE_REGION2:
-		table += (gaddr >> 42) & 0x7ff;
+		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if (level == 3)
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
@@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* Fallthrough */
 	case _ASCE_TYPE_REGION3:
-		table += (gaddr >> 31) & 0x7ff;
+		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if (level == 2)
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
@@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 		/* Fallthrough */
 	case _ASCE_TYPE_SEGMENT:
-		table += (gaddr >> 20) & 0x7ff;
+		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 		if (level == 1)
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
 		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
-		table += (gaddr >> 12) & 0xff;
+		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
 	}
 	return table;
 }
@@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
 	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
 	if (!table || *table & _PAGE_INVALID)
 		return;
-	gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1);
+	gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
 	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
 }
 
@@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
-	for (i = 0; i < 256; i++, raddr += 1UL << 12)
+	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
 		pgt[i] = _PAGE_INVALID;
 }
 
@@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
 	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
 		return;
-	gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
-	sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff));
+	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+	sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
 	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
 	pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
 	*ste = _SEGMENT_ENTRY_EMPTY;
@@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
-	for (i = 0; i < 2048; i++, raddr += 1UL << 20) {
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
 		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
 			continue;
 		pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
@@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 	r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
 	if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
 		return;
-	gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1);
-	r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff));
+	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
 	gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
 	sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
 	*r3e = _REGION3_ENTRY_EMPTY;
@@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 	/* Free segment table */
 	page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
 	list_del(&page->lru);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
 /**
@@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
-	for (i = 0; i < 2048; i++, raddr += 1UL << 31) {
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
 		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
 		sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
@@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
 		/* Free segment table */
 		page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
 		list_del(&page->lru);
-		__free_pages(page, 2);
+		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
 
@@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 	r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
 	if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
 		return;
-	gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1);
-	r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff));
+	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
 	gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
 	r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
 	*r2e = _REGION2_ENTRY_EMPTY;
@@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 	/* Free region 3 table */
 	page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
 	list_del(&page->lru);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
 /**
@@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
-	for (i = 0; i < 2048; i++, raddr += 1UL << 42) {
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
 		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
 		r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
@@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 		/* Free region 3 table */
 		page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
 		list_del(&page->lru);
-		__free_pages(page, 2);
+		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
 
@@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
 	if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
 		return;
-	gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1);
-	r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff));
+	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
 	gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
 	r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
 	*r1e = _REGION1_ENTRY_EMPTY;
@@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 	/* Free region 2 table */
 	page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
 	list_del(&page->lru);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
 /**
@@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
-	for (i = 0; i < 2048; i++, raddr += 1UL << 53) {
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
 		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
 		r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
@@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
 		/* Free region 2 table */
 		page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
 		list_del(&page->lru);
-		__free_pages(page, 2);
+		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
 
@@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
 	/* protect after insertion, so it will get properly invalidated */
 	down_read(&parent->mm->mmap_sem);
 	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
-				((asce & _ASCE_TABLE_LENGTH) + 1) * 4096,
+				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
 				PROT_READ, PGSTE_VSIE_BIT);
 	up_read(&parent->mm->mmap_sem);
 	spin_lock(&parent->shadow_lock);
@@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	/* Allocate a shadow region second table */
-	page = alloc_pages(GFP_KERNEL, 2);
+	page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
 	if (!page)
 		return -ENOMEM;
 	page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 	}
 	spin_unlock(&sg->guest_table_lock);
 	/* Make r2t read-only in parent gmap page table */
-	raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1;
+	raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
 	origin = r2t & _REGION_ENTRY_ORIGIN;
-	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
-	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
@@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 	return rc;
 out_free:
 	spin_unlock(&sg->guest_table_lock);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
@@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 
 	BUG_ON(!gmap_is_shadow(sg));
 	/* Allocate a shadow region second table */
-	page = alloc_pages(GFP_KERNEL, 2);
+	page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
 	if (!page)
 		return -ENOMEM;
 	page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 	}
 	spin_unlock(&sg->guest_table_lock);
 	/* Make r3t read-only in parent gmap page table */
-	raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2;
+	raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
 	origin = r3t & _REGION_ENTRY_ORIGIN;
-	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
-	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
@@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 	return rc;
 out_free:
 	spin_unlock(&sg->guest_table_lock);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
@@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 
 	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
 	/* Allocate a shadow segment table */
-	page = alloc_pages(GFP_KERNEL, 2);
+	page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
 	if (!page)
 		return -ENOMEM;
 	page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 	}
 	spin_unlock(&sg->guest_table_lock);
 	/* Make sgt read-only in parent gmap page table */
-	raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3;
+	raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
 	origin = sgt & _REGION_ENTRY_ORIGIN;
-	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096;
-	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
 	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
@@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 	return rc;
 out_free:
 	spin_unlock(&sg->guest_table_lock);
-	__free_pages(page, 2);
+	__free_pages(page, CRST_ALLOC_ORDER);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
@@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	}
 	spin_unlock(&sg->guest_table_lock);
 	/* Make pgt read-only in parent gmap page table (not the pgste) */
-	raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT;
+	raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
 	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
 	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
 	spin_lock(&sg->guest_table_lock);
@@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 	}
 	/* Check for top level table */
 	start = sg->orig_asce & _ASCE_ORIGIN;
-	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
 	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
 	    gaddr < end) {
 		/* The complete shadow table has to go */
@@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 		return;
 	}
 	/* Remove the page table tree from on specific entry */
-	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12);
+	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
 	gmap_for_each_rmap_safe(rmap, rnext, head) {
 		bits = rmap->raddr & _SHADOW_RMAP_MASK;
 		raddr = rmap->raddr ^ bits;
@@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 	struct gmap *gmap, *sg, *next;
 
 	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
-	offset = offset * (4096 / sizeof(pte_t));
+	offset = offset * (PAGE_SIZE / sizeof(pte_t));
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		spin_lock(&gmap->guest_table_lock);
@@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
 }
 
 /*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ *   future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+			   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long addr;
+
+	for (addr = start; addr != end; addr += PAGE_SIZE) {
+		pte_t *ptep;
+		spinlock_t *ptl;
+
+		ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+		if (is_zero_pfn(pte_pfn(*ptep)))
+			ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+		pte_unmap_unlock(ptep, ptl);
+	}
+	return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+	struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
  * switch on pgstes for its userspace process (for kvm)
  */
 int s390_enable_sie(void)
@@ -2137,6 +2168,7 @@ int s390_enable_sie(void)
 	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
+	zap_zero_pages(mm);
 	up_write(&mm->mmap_sem);
 	return 0;
 }
@@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
 static int __s390_enable_skey(pte_t *pte, unsigned long addr,
 			      unsigned long next, struct mm_walk *walk)
 {
-	/*
-	 * Remove all zero page mappings,
-	 * after establishing a policy to forbid zero page mappings
-	 * following faults for that page will get fresh anonymous pages
-	 */
-	if (is_zero_pfn(pte_pfn(*pte)))
-		ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
 	/* Clear storage key */
 	ptep_zap_key(walk->mm, addr, pte);
 	return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8111694ce55a..3b567838b905 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
 #include <linux/poison.h>
 #include <linux/initrd.h>
 #include <linux/export.h>
+#include <linux/cma.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <asm/processor.h>
@@ -84,7 +85,7 @@ void __init paging_init(void)
 	psw_t psw;
 
 	init_mm.pgd = swapper_pg_dir;
-	if (VMALLOC_END > (1UL << 42)) {
+	if (VMALLOC_END > _REGION2_SIZE) {
 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
 		pgd_type = _REGION2_ENTRY_EMPTY;
 	} else {
@@ -93,8 +94,7 @@ void __init paging_init(void)
 	}
 	init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
 	S390_lowcore.kernel_asce = init_mm.context.asce;
-	clear_table((unsigned long *) init_mm.pgd, pgd_type,
-		    sizeof(unsigned long)*2048);
+	crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
 	vmem_map_init();
 
         /* enable virtual mapping in kernel mode */
@@ -137,6 +137,8 @@ void __init mem_init(void)
 	free_all_bootmem();
 	setup_zero_pages();	/* Setup zeroed pages. */
 
+	cmma_init_nodat();
+
 	mem_init_print_info(NULL);
 }
 
@@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+	struct s390_cma_mem_data *mem_data;
+	unsigned long start, end;
+
+	mem_data = data;
+	start = cma_get_base(cma);
+	end = start + cma_get_size(cma);
+	if (end < mem_data->start)
+		return 0;
+	if (start >= mem_data->end)
+		return 0;
+	return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct s390_cma_mem_data mem_data;
+	struct memory_notify *arg;
+	int rc = 0;
+
+	arg = data;
+	mem_data.start = arg->start_pfn << PAGE_SHIFT;
+	mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+	if (action == MEM_GOING_OFFLINE)
+		rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+	return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+	.notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+	return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
 int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 69a7b01ae746..07fa7b8ae233 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -10,9 +10,10 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
-
+#include <asm/facility.h>
 #include <asm/page-states.h>
 
 static int cmma_flag = 1;
@@ -36,14 +37,16 @@ __setup("cmma=", cmma);
 static inline int cmma_test_essa(void)
 {
 	register unsigned long tmp asm("0") = 0;
-	register int rc asm("1") = -EOPNOTSUPP;
+	register int rc asm("1");
 
+	/* test ESSA_GET_STATE */
 	asm volatile(
-		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
+		"	.insn	rrf,0xb9ab0000,%1,%1,%2,0\n"
 		"0:     la      %0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
-		: "+&d" (rc), "+&d" (tmp));
+		: "=&d" (rc), "+&d" (tmp)
+		: "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
 	return rc;
 }
 
@@ -51,11 +54,26 @@ void __init cmma_init(void)
 {
 	if (!cmma_flag)
 		return;
-	if (cmma_test_essa())
+	if (cmma_test_essa()) {
 		cmma_flag = 0;
+		return;
+	}
+	if (test_facility(147))
+		cmma_flag = 2;
 }
 
-static inline void set_page_unstable(struct page *page, int order)
+static inline unsigned char get_page_state(struct page *page)
+{
+	unsigned char state;
+
+	asm volatile("	.insn	rrf,0xb9ab0000,%0,%1,%2,0"
+		     : "=&d" (state)
+		     : "a" (page_to_phys(page)),
+		       "i" (ESSA_GET_STATE));
+	return state & 0x3f;
+}
+
+static inline void set_page_unused(struct page *page, int order)
 {
 	int i, rc;
 
@@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order)
 			       "i" (ESSA_SET_UNUSED));
 }
 
-void arch_free_page(struct page *page, int order)
+static inline void set_page_stable_dat(struct page *page, int order)
 {
-	if (!cmma_flag)
-		return;
-	set_page_unstable(page, order);
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_STABLE));
 }
 
-static inline void set_page_stable(struct page *page, int order)
+static inline void set_page_stable_nodat(struct page *page, int order)
 {
 	int i, rc;
 
@@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order)
 		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
 			     : "=&d" (rc)
 			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_STABLE));
+			       "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd) || pmd_large(*pmd))
+			continue;
+		page = virt_to_page(pmd_val(*pmd));
+		set_bit(PG_arch_1, &page->flags);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	pud_t *pud;
+	int i;
+
+	pud = pud_offset(p4d, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none(*pud) || pud_large(*pud))
+			continue;
+		if (!pud_folded(*pud)) {
+			page = virt_to_page(pud_val(*pud));
+			for (i = 0; i < 3; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_pmd(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	p4d_t *p4d;
+	int i;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(*p4d))
+			continue;
+		if (!p4d_folded(*p4d)) {
+			page = virt_to_page(p4d_val(*p4d));
+			for (i = 0; i < 3; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_pud(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+	unsigned long addr, next;
+	struct page *page;
+	pgd_t *pgd;
+	int i;
+
+	addr = 0;
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, MODULES_END);
+		if (pgd_none(*pgd))
+			continue;
+		if (!pgd_folded(*pgd)) {
+			page = virt_to_page(pgd_val(*pgd));
+			for (i = 0; i < 3; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_p4d(pgd, addr, next);
+	} while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+	struct memblock_region *reg;
+	struct page *page;
+	unsigned long start, end, ix;
+
+	if (cmma_flag < 2)
+		return;
+	/* Mark pages used in kernel page tables */
+	mark_kernel_pgd();
+
+	/* Set all kernel pages not used for page tables to stable/no-dat */
+	for_each_memblock(memory, reg) {
+		start = memblock_region_memory_base_pfn(reg);
+		end = memblock_region_memory_end_pfn(reg);
+		page = pfn_to_page(start);
+		for (ix = start; ix < end; ix++, page++) {
+			if (__test_and_clear_bit(PG_arch_1, &page->flags))
+				continue;	/* skip page table pages */
+			if (!list_empty(&page->lru))
+				continue;	/* skip free pages */
+			set_page_stable_nodat(page, 0);
+		}
+	}
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_unused(page, order);
 }
 
 void arch_alloc_page(struct page *page, int order)
 {
 	if (!cmma_flag)
 		return;
-	set_page_stable(page, order);
+	if (cmma_flag < 2)
+		set_page_stable_dat(page, order);
+	else
+		set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_stable_dat(page, order);
+}
+
+void arch_set_page_nodat(struct page *page, int order)
+{
+	if (cmma_flag < 2)
+		return;
+	set_page_stable_nodat(page, order);
+}
+
+int arch_test_page_nodat(struct page *page)
+{
+	unsigned char state;
+
+	if (cmma_flag < 2)
+		return 0;
+	state = get_page_state(page);
+	return !!(state & 0x20);
 }
 
 void arch_set_page_states(int make_stable)
@@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable)
 			list_for_each(l, &zone->free_area[order].free_list[t]) {
 				page = list_entry(l, struct page, lru);
 				if (make_stable)
-					set_page_stable(page, order);
+					set_page_stable_dat(page, 0);
 				else
-					set_page_unstable(page, order);
+					set_page_unused(page, order);
 			}
 		}
 		spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 180481589246..552f898dfa74 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,6 +7,7 @@
 #include <asm/cacheflush.h>
 #include <asm/facility.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 #include <asm/page.h>
 #include <asm/set_memory.h>
 
@@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
 	pud_t new;
 	int i, ro, nx;
 
-	pm_dir = vmem_pmd_alloc();
+	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
 	if (!pm_dir)
 		return -ENOMEM;
 	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
@@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
 		return;
 	}
 	for (i = 0; i < nr; i++) {
-		__ptep_ipte(address, pte, IPTE_GLOBAL);
+		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
 		address += PAGE_SIZE;
 		pte++;
 	}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 18918e394ce4..c5b74dd61197 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 
 	if (!page)
 		return NULL;
+	arch_set_page_dat(page, 2);
 	return (unsigned long *) page_to_phys(page);
 }
 
@@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 	int rc, notify;
 
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
-	BUG_ON(mm->context.asce_limit < (1UL << 42));
+	BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
 	if (end >= TASK_SIZE_MAX)
 		return -ENOMEM;
 	rc = 0;
@@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 		}
 		spin_lock_bh(&mm->page_table_lock);
 		pgd = (unsigned long *) mm->pgd;
-		if (mm->context.asce_limit == (1UL << 42)) {
+		if (mm->context.asce_limit == _REGION2_SIZE) {
 			crst_table_init(table, _REGION2_ENTRY_EMPTY);
 			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
 			mm->pgd = (pgd_t *) table;
-			mm->context.asce_limit = 1UL << 53;
+			mm->context.asce_limit = _REGION1_SIZE;
 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
 		} else {
@@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
 	pgd_t *pgd;
 
 	/* downgrade should only happen from 3 to 2 levels (compat only) */
-	BUG_ON(mm->context.asce_limit != (1UL << 42));
+	BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
 
 	if (current->active_mm == mm) {
 		clear_user_asce();
@@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm)
 
 	pgd = mm->pgd;
 	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
-	mm->context.asce_limit = 1UL << 31;
+	mm->context.asce_limit = _REGION3_SIZE;
 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
 	crst_table_free(mm, (unsigned long *) pgd);
@@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 		__free_page(page);
 		return NULL;
 	}
+	arch_set_page_dat(page, 0);
 	/* Initialize page table */
 	table = (unsigned long *) page_to_phys(page);
 	if (mm_alloc_pgste(mm)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4a1f7366b17a..4198a71b8fdd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,8 +25,49 @@
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, int nodat)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL || nodat)
+			opt |= IPTE_NODAT;
+		if (asce != -1UL) {
+			asce = asce ? : mm->context.asce;
+			opt |= IPTE_GUEST_ASCE;
+		}
+		__ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+	}
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, int nodat)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL || nodat)
+			opt |= IPTE_NODAT;
+		if (asce != -1UL) {
+			asce = asce ? : mm->context.asce;
+			opt |= IPTE_GUEST_ASCE;
+		}
+		__ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	}
+}
+
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
-				      unsigned long addr, pte_t *ptep)
+				      unsigned long addr, pte_t *ptep,
+				      int nodat)
 {
 	pte_t old;
 
@@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte(addr, ptep, IPTE_LOCAL);
+		ptep_ipte_local(mm, addr, ptep, nodat);
 	else
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		ptep_ipte_global(mm, addr, ptep, nodat);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
-				    unsigned long addr, pte_t *ptep)
+				    unsigned long addr, pte_t *ptep,
+				    int nodat)
 {
 	pte_t old;
 
@@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		ptep_ipte_global(mm, addr, ptep, nodat);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 {
 	pgste_t pgste;
 	pte_t old;
+	int nodat;
 
 	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
-	old = ptep_flush_direct(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_direct(mm, addr, ptep, nodat);
 	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 	preempt_enable();
 	return old;
@@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 {
 	pgste_t pgste;
 	pte_t old;
+	int nodat;
 
 	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
-	old = ptep_flush_lazy(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_lazy(mm, addr, ptep, nodat);
 	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 	preempt_enable();
 	return old;
@@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 {
 	pgste_t pgste;
 	pte_t old;
+	int nodat;
 
 	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
-	old = ptep_flush_lazy(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_lazy(mm, addr, ptep, nodat);
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_update_all(old, pgste, mm);
 		pgste_set(ptep, pgste);
@@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 
+static inline void pmdp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_LOCAL);
+	else
+		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+	else
+		__pmdp_csp(pmdp);
+}
+
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
@@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 	old = *pmdp;
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
-	if (!MACHINE_HAS_IDTE) {
-		__pmdp_csp(pmdp);
-		return old;
-	}
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte(addr, pmdp, IDTE_LOCAL);
+		pmdp_idte_local(mm, addr, pmdp);
 	else
-		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
+		pmdp_idte_global(mm, addr, pmdp);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 			  cpumask_of(smp_processor_id()))) {
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
-	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
-	else
-		__pmdp_csp(pmdp);
+	} else {
+		pmdp_idte_global(mm, addr, pmdp);
+	}
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 
-static inline pud_t pudp_flush_direct(struct mm_struct *mm,
-				      unsigned long addr, pud_t *pudp)
+static inline void pudp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pud_t *pudp)
 {
-	pud_t old;
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_LOCAL);
+	else
+		__pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
 
-	old = *pudp;
-	if (pud_val(old) & _REGION_ENTRY_INVALID)
-		return old;
-	if (!MACHINE_HAS_IDTE) {
+static inline void pudp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pud_t *pudp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+	else
 		/*
 		 * Invalid bit position is the same for pmd and pud, so we can
 		 * re-use _pmd_csp() here
 		 */
 		__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
 		return old;
-	}
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pudp_idte(addr, pudp, IDTE_LOCAL);
+		pudp_idte_local(mm, addr, pudp);
 	else
-		__pudp_idte(addr, pudp, IDTE_GLOBAL);
+		pudp_idte_global(mm, addr, pudp);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
 {
 	pte_t entry;
 	pgste_t pgste;
-	int pte_i, pte_p;
+	int pte_i, pte_p, nodat;
 
 	pgste = pgste_get_lock(ptep);
 	entry = *ptep;
@@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
 		return -EAGAIN;
 	}
 	/* Change access rights and set pgste bit */
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 	if (prot == PROT_NONE && !pte_i) {
-		ptep_flush_direct(mm, addr, ptep);
+		ptep_flush_direct(mm, addr, ptep, nodat);
 		pgste = pgste_update_all(entry, pgste, mm);
 		pte_val(entry) |= _PAGE_INVALID;
 	}
 	if (prot == PROT_READ && !pte_p) {
-		ptep_flush_direct(mm, addr, ptep);
+		ptep_flush_direct(mm, addr, ptep, nodat);
 		pte_val(entry) &= ~_PAGE_INVALID;
 		pte_val(entry) |= _PAGE_PROTECT;
 	}
@@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
 {
 	pgste_t pgste;
+	int nodat;
 
 	pgste = pgste_get_lock(ptep);
 	/* notifier is called by the caller */
-	ptep_flush_direct(mm, saddr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	ptep_flush_direct(mm, saddr, ptep, nodat);
 	/* don't touch the storage key - it belongs to parent pgste */
 	pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
 	pgste_set_unlock(ptep, pgste);
@@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 	pte_t *ptep;
 	pte_t pte;
 	bool dirty;
+	int nodat;
 
 	pgd = pgd_offset(mm, addr);
 	p4d = p4d_alloc(mm, pgd, addr);
@@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+		ptep_ipte_global(mm, addr, ptep, nodat);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
 		else
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d8398962a723..c0af0d7b6e5f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 	return (void *) memblock_alloc(size, size);
 }
 
-static inline p4d_t *vmem_p4d_alloc(void)
+void *vmem_crst_alloc(unsigned long val)
 {
-	p4d_t *p4d = NULL;
+	unsigned long *table;
 
-	p4d = vmem_alloc_pages(2);
-	if (!p4d)
-		return NULL;
-	clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
-	return p4d;
-}
-
-static inline pud_t *vmem_pud_alloc(void)
-{
-	pud_t *pud = NULL;
-
-	pud = vmem_alloc_pages(2);
-	if (!pud)
-		return NULL;
-	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
-	return pud;
-}
-
-pmd_t *vmem_pmd_alloc(void)
-{
-	pmd_t *pmd = NULL;
-
-	pmd = vmem_alloc_pages(2);
-	if (!pmd)
-		return NULL;
-	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
-	return pmd;
+	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+	if (table)
+		crst_table_init(table, val);
+	return table;
 }
 
 pte_t __ref *vmem_pte_alloc(void)
@@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
-			p4_dir = vmem_p4d_alloc();
+			p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
 			if (!p4_dir)
 				goto out;
 			pgd_populate(&init_mm, pg_dir, p4_dir);
 		}
 		p4_dir = p4d_offset(pg_dir, address);
 		if (p4d_none(*p4_dir)) {
-			pu_dir = vmem_pud_alloc();
+			pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
 			if (!pu_dir)
 				goto out;
 			p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
 			continue;
 		}
 		if (pud_none(*pu_dir)) {
-			pm_dir = vmem_pmd_alloc();
+			pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
 			if (!pm_dir)
 				goto out;
 			pud_populate(&init_mm, pu_dir, pm_dir);
@@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	for (address = start; address < end;) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
-			p4_dir = vmem_p4d_alloc();
+			p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
 			if (!p4_dir)
 				goto out;
 			pgd_populate(&init_mm, pg_dir, p4_dir);
@@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
 		p4_dir = p4d_offset(pg_dir, address);
 		if (p4d_none(*p4_dir)) {
-			pu_dir = vmem_pud_alloc();
+			pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
 			if (!pu_dir)
 				goto out;
 			p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
 		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
-			pm_dir = vmem_pmd_alloc();
+			pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
 			if (!pm_dir)
 				goto out;
 			pud_populate(&init_mm, pu_dir, pm_dir);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1803797fc885..8ec88497a28d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1093,15 +1093,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ks;
+	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ks;
 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ks;
+	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ks;
 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ku;
+	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ku;
 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ku;
+	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ku;
 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
 		mask = 0x7000; /* jne */
 		goto branch_ku;
@@ -1119,15 +1131,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xs;
+	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+		mask = 0x4000; /* jl */
+		goto branch_xs;
 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xs;
+	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+		mask = 0xc000; /* jle */
+		goto branch_xs;
 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
 		mask = 0x2000; /* jh */
 		goto branch_xu;
+	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+		mask = 0x4000; /* jl */
+		goto branch_xu;
 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xu;
+	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+		mask = 0xc000; /* jle */
+		goto branch_xu;
 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
 		mask = 0x7000; /* jne */
 		goto branch_xu;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd534b4d40e3..0ae3936e266f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,6 +24,14 @@
 
 bool zpci_unique_uid;
 
+static void update_uid_checking(bool new)
+{
+	if (zpci_unique_uid != new)
+		zpci_dbg(1, "uid checking:%d\n", new);
+
+	zpci_unique_uid = new;
+}
+
 static inline void zpci_err_clp(unsigned int rsp, int rc)
 {
 	struct {
@@ -319,7 +327,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
 			goto out;
 		}
 
-		zpci_unique_uid = rrb->response.uid_checking;
+		update_uid_checking(rrb->response.uid_checking);
 		WARN_ON_ONCE(rrb->response.entry_size !=
 			sizeof(struct clp_fh_list_entry));
 
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 025ea20fc4b4..29d72bf8ed2b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -41,7 +41,7 @@ static struct facility_def facility_defs[] = {
 			27, /* mvcos */
 			32, /* compare and swap and store */
 			33, /* compare and swap and store 2 */
-			34, /* general extension facility */
+			34, /* general instructions extension */
 			35, /* execute extensions */
 #endif
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = {
 #ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
 			53, /* load-and-zero-rightmost-byte, etc. */
 #endif
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+			58, /* miscellaneous-instruction-extension 2 */
+#endif
 			-1 /* END */
 		}
 	},
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 75c92fc1876b..56b5e4ce8d4a 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -28,7 +28,6 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_IP_NF_QUEUE=y
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index d0078747d308..8f8cf941a8cd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -27,21 +27,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
 }
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	u32 oparg = (encoded_op << 8) >> 20;
-	u32 cmparg = (encoded_op << 20) >> 20;
 	u32 oldval, newval, prev;
 	int ret;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 
 	do {
@@ -80,17 +71,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
 
 	return ret;
 }
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
index c46e8cc7b515..5ed7dbbd94ff 100644
--- a/arch/sh/include/asm/spinlock-cas.h
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -29,11 +29,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	while (!__sl_cas(&lock->lock, 1, 0));
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
index cec78143fa83..f77263aae760 100644
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -21,11 +21,6 @@
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
  * on the local processor, one does not.
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index c90930de76ba..3cd4f6b198b6 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 
 	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
 			      keystream, AES_BLOCK_SIZE);
-	crypto_xor((u8 *) keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index ee3f11c43cda..7643e979e333 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
 #define atomic_read(v)          ACCESS_ONCE((v)->counter)
 
 #define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 4e899b0dabf7..1cfd89d92208 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -29,22 +29,14 @@
 	: "r" (uaddr), "r" (oparg), "i" (-EFAULT)	\
 	: "memory")
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret, tem;
 
-	if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
-		return -EFAULT;
 	if (unlikely((((unsigned long) uaddr) & 0x3UL)))
 		return -EINVAL;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
 	pagefault_disable();
 
 	switch (op) {
@@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 8011e79f59c9..67345b2dc408 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -14,11 +14,6 @@
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 186fd8199f54..b2f5c50d0947 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -98,6 +98,8 @@
 
 #define SO_PEERGROUPS		0x003d
 
+#define SO_ZEROCOPY		0x003e
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 8799ae9a8788..c340af7b1371 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -128,6 +128,8 @@ static u32 WDISP10(u32 off)
 
 #define BA		(BRANCH | CONDA)
 #define BG		(BRANCH | CONDG)
+#define BL		(BRANCH | CONDL)
+#define BLE		(BRANCH | CONDLE)
 #define BGU		(BRANCH | CONDGU)
 #define BLEU		(BRANCH | CONDLEU)
 #define BGE		(BRANCH | CONDGE)
@@ -715,9 +717,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JGT:
 			br_opcode = BGU;
 			break;
+		case BPF_JLT:
+			br_opcode = BLU;
+			break;
 		case BPF_JGE:
 			br_opcode = BGEU;
 			break;
+		case BPF_JLE:
+			br_opcode = BLEU;
+			break;
 		case BPF_JSET:
 		case BPF_JNE:
 			br_opcode = BNE;
@@ -725,9 +733,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JSGT:
 			br_opcode = BG;
 			break;
+		case BPF_JSLT:
+			br_opcode = BL;
+			break;
 		case BPF_JSGE:
 			br_opcode = BGE;
 			break;
+		case BPF_JSLE:
+			br_opcode = BLE;
+			break;
 		default:
 			/* Make sure we dont leak kernel information to the
 			 * user.
@@ -746,18 +760,30 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JGT:
 			cbcond_opcode = CBCONDGU;
 			break;
+		case BPF_JLT:
+			cbcond_opcode = CBCONDLU;
+			break;
 		case BPF_JGE:
 			cbcond_opcode = CBCONDGEU;
 			break;
+		case BPF_JLE:
+			cbcond_opcode = CBCONDLEU;
+			break;
 		case BPF_JNE:
 			cbcond_opcode = CBCONDNE;
 			break;
 		case BPF_JSGT:
 			cbcond_opcode = CBCONDG;
 			break;
+		case BPF_JSLT:
+			cbcond_opcode = CBCONDL;
+			break;
 		case BPF_JSGE:
 			cbcond_opcode = CBCONDGE;
 			break;
+		case BPF_JSLE:
+			cbcond_opcode = CBCONDLE;
+			break;
 		default:
 			/* Make sure we dont leak kernel information to the
 			 * user.
@@ -1176,10 +1202,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	/* IF (dst COND src) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JNE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 	case BPF_JMP | BPF_JSET | BPF_X: {
 		int err;
 
@@ -1191,10 +1221,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
 	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JNE | BPF_K:
 	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP | BPF_JSET | BPF_K: {
 		int err;
 
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index a93774255136..53a423e7cb92 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n)
 	_atomic_xchg(&v->counter, n);
 }
 
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
 /* A 64bit atomic type */
 
 typedef struct {
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index e64a1b75fc38..83c1e639b411 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -106,12 +106,9 @@
 	lock = __atomic_hashed_lock((int __force *)uaddr)
 #endif
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int uninitialized_var(val), ret;
 
 	__futex_prolog();
@@ -119,12 +116,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	/* The 32-bit futex code makes this assumption, so validate it here. */
 	BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -148,30 +139,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	}
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (val == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (val != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (val < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (val >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (val <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (val > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = val;
+
 	return ret;
 }
 
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index b14b1ba5bf9c..cba8ba9b8da6 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -64,8 +64,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->current_ticket = old_ticket + TICKET_QUANTUM;
 }
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock);
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index b9718fb4e74a..9a2c2d605752 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -58,8 +58,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
 }
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock);
-
 void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
 
 /* Grab the "next" ticket number and bump it atomically.
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 076c6cc43113..db9333f2447c 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -62,29 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
 }
 EXPORT_SYMBOL(arch_spin_trylock);
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	u32 iterations = 0;
-	int curr = READ_ONCE(lock->current_ticket);
-	int next = READ_ONCE(lock->next_ticket);
-
-	/* Return immediately if unlocked. */
-	if (next == curr)
-		return;
-
-	/* Wait until the current locker has released the lock. */
-	do {
-		delay_backoff(iterations++);
-	} while (READ_ONCE(lock->current_ticket) == curr);
-
-	/*
-	 * The TILE architecture doesn't do read speculation; therefore
-	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
-	 */
-	barrier();
-}
-EXPORT_SYMBOL(arch_spin_unlock_wait);
-
 /*
  * The low byte is always reserved to be the marker for a "tns" operation
  * since the low bit is set to "1" by a tns.  The next seven bits are
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index a4b5b2cbce93..de414c22892f 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -62,28 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
 }
 EXPORT_SYMBOL(arch_spin_trylock);
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	u32 iterations = 0;
-	u32 val = READ_ONCE(lock->lock);
-	u32 curr = arch_spin_current(val);
-
-	/* Return immediately if unlocked. */
-	if (arch_spin_next(val) == curr)
-		return;
-
-	/* Wait until the current locker has released the lock. */
-	do {
-		delay_backoff(iterations++);
-	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
-
-	/*
-	 * The TILE architecture doesn't do read speculation; therefore
-	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
-	 */
-	barrier();
-}
-EXPORT_SYMBOL(arch_spin_unlock_wait);
 
 /*
  * If the read lock fails due to a writer, we retry periodically
diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h
new file mode 100644
index 000000000000..7ffa5437b761
--- /dev/null
+++ b/arch/um/include/asm/unwind.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_UML_UNWIND_H
+#define _ASM_UML_UNWIND_H
+
+static inline void
+unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+		   void *orc, size_t orc_size) {}
+
+#endif /* _ASM_UML_UNWIND_H */
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 586b786b3edf..0038a2d10a7a 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -8,10 +8,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-$(CONFIG_XEN) += xen/
 
 # Hyper-V paravirtualization support
-obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/
-
-# lguest paravirtualization support
-obj-$(CONFIG_LGUEST_GUEST) += lguest/
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/
 
 obj-y += realmode/
 obj-y += kernel/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 323cb065be5e..4b278a33ccbb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,8 @@ config X86
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_PMEM_API		if X86_64
+	# Causing hangs/crashes, see the commit that added this change for details.
+	select ARCH_HAS_REFCOUNT		if BROKEN
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SG_CHAIN
@@ -73,7 +75,6 @@ config X86
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select BUILDTIME_EXTABLE_SORT
@@ -158,6 +159,7 @@ config X86
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MIXED_BREAKPOINTS_REGS
+	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_OPROFILE
 	select HAVE_OPTPROBES
@@ -167,8 +169,9 @@ config X86
 	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_RELIABLE_STACKTRACE		if X86_64 && FRAME_POINTER && STACK_VALIDATION
+	select HAVE_RELIABLE_STACKTRACE		if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
 	select HAVE_STACK_VALIDATION		if X86_64
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNSTABLE_SCHED_CLOCK
@@ -327,6 +330,7 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
 	int
+	default 5 if X86_5LEVEL
 	default 4 if X86_64
 	default 3 if X86_PAE
 	default 2
@@ -425,16 +429,16 @@ config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
 
-config INTEL_RDT_A
-	bool "Intel Resource Director Technology Allocation support"
+config INTEL_RDT
+	bool "Intel Resource Director Technology support"
 	default n
 	depends on X86 && CPU_SUP_INTEL
 	select KERNFS
 	help
-	  Select to enable resource allocation which is a sub-feature of
-	  Intel Resource Director Technology(RDT). More information about
-	  RDT can be found in the Intel x86 Architecture Software
-	  Developer Manual.
+	  Select to enable resource allocation and monitoring which are
+	  sub-features of Intel Resource Director Technology(RDT). More
+	  information about RDT can be found in the Intel x86
+	  Architecture Software Developer Manual.
 
 	  Say N if unsure.
 
@@ -778,8 +782,6 @@ config KVM_DEBUG_FS
 	  Statistics are displayed in debugfs filesystem. Enabling this option
 	  may incur significant overhead.
 
-source "arch/x86/lguest/Kconfig"
-
 config PARAVIRT_TIME_ACCOUNTING
 	bool "Paravirtual steal time accounting"
 	depends on PARAVIRT
@@ -1399,6 +1401,24 @@ config X86_PAE
 	  has the cost of more pagetable lookup overhead, and also
 	  consumes more pagetable space per process.
 
+config X86_5LEVEL
+	bool "Enable 5-level page tables support"
+	depends on X86_64
+	---help---
+	  5-level paging enables access to larger address space:
+	  upto 128 PiB of virtual address space and 4 PiB of
+	  physical address space.
+
+	  It will be supported by future Intel CPUs.
+
+	  Note: a kernel with this option enabled can only be booted
+	  on machines that support the feature.
+
+	  See Documentation/x86/x86_64/5level-paging.txt for more
+	  information.
+
+	  Say N if unsure.
+
 config ARCH_PHYS_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || X86_PAE
@@ -1416,6 +1436,35 @@ config X86_DIRECT_GBPAGES
 	  supports them), so don't confuse the user by printing
 	  that we have them enabled.
 
+config ARCH_HAS_MEM_ENCRYPT
+	def_bool y
+
+config AMD_MEM_ENCRYPT
+	bool "AMD Secure Memory Encryption (SME) support"
+	depends on X86_64 && CPU_SUP_AMD
+	---help---
+	  Say yes to enable support for the encryption of system memory.
+	  This requires an AMD processor that supports Secure Memory
+	  Encryption (SME).
+
+config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+	bool "Activate AMD Secure Memory Encryption (SME) by default"
+	default y
+	depends on AMD_MEM_ENCRYPT
+	---help---
+	  Say yes to have system memory encrypted by default if running on
+	  an AMD processor that supports Secure Memory Encryption (SME).
+
+	  If set to Y, then the encryption of system memory can be
+	  deactivated with the mem_encrypt=off command line option.
+
+	  If set to N, then the encryption of system memory can be
+	  activated with the mem_encrypt=on command line option.
+
+config ARCH_USE_MEMREMAP_PROT
+	def_bool y
+	depends on AMD_MEM_ENCRYPT
+
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support"
@@ -1757,7 +1806,9 @@ config X86_SMAP
 config X86_INTEL_MPX
 	prompt "Intel MPX (Memory Protection Extensions)"
 	def_bool n
-	depends on CPU_SUP_INTEL
+	# Note: only available in 64-bit mode due to VMA flags shortage
+	depends on CPU_SUP_INTEL && X86_64
+	select ARCH_USES_HIGH_VMA_FLAGS
 	---help---
 	  MPX provides hardware features that can be used in
 	  conjunction with compiler-instrumented code to check
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index cd20ca0b4043..71a48a30fc84 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
 	  Some of these sanity checks may slow down kernel entries and
 	  exits or otherwise impact performance.
 
-	  This is currently used to help test NMI code.
-
 	  If unsure, say N.
 
 config DEBUG_NMI_SELFTEST
@@ -358,4 +356,61 @@ config PUNIT_ATOM_DEBUG
 	  The current power state can be read from
 	  /sys/kernel/debug/punit_atom/dev_power_state
 
+choice
+	prompt "Choose kernel unwinder"
+	default FRAME_POINTER_UNWINDER
+	---help---
+	  This determines which method will be used for unwinding kernel stack
+	  traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
+	  livepatch, lockdep, and more.
+
+config FRAME_POINTER_UNWINDER
+	bool "Frame pointer unwinder"
+	select FRAME_POINTER
+	---help---
+	  This option enables the frame pointer unwinder for unwinding kernel
+	  stack traces.
+
+	  The unwinder itself is fast and it uses less RAM than the ORC
+	  unwinder, but the kernel text size will grow by ~3% and the kernel's
+	  overall performance will degrade by roughly 5-10%.
+
+	  This option is recommended if you want to use the livepatch
+	  consistency model, as this is currently the only way to get a
+	  reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+
+config ORC_UNWINDER
+	bool "ORC unwinder"
+	depends on X86_64
+	select STACK_VALIDATION
+	---help---
+	  This option enables the ORC (Oops Rewind Capability) unwinder for
+	  unwinding kernel stack traces.  It uses a custom data format which is
+	  a simplified version of the DWARF Call Frame Information standard.
+
+	  This unwinder is more accurate across interrupt entry frames than the
+	  frame pointer unwinder.  It also enables a 5-10% performance
+	  improvement across the entire kernel compared to frame pointers.
+
+	  Enabling this option will increase the kernel's runtime memory usage
+	  by roughly 2-4MB, depending on your kernel config.
+
+config GUESS_UNWINDER
+	bool "Guess unwinder"
+	depends on EXPERT
+	---help---
+	  This option enables the "guess" unwinder for unwinding kernel stack
+	  traces.  It scans the stack and reports every kernel text address it
+	  finds.  Some of the addresses it reports may be incorrect.
+
+	  While this option often produces false positives, it can still be
+	  useful in many cases.  Unlike the other unwinders, it has no runtime
+	  overhead.
+
+endchoice
+
+config FRAME_POINTER
+	depends on !ORC_UNWINDER && !GUESS_UNWINDER
+	bool
+
 endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1e902f926be3..6276572259c8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -14,9 +14,11 @@ endif
 # For gcc stack alignment is specified with -mpreferred-stack-boundary,
 # clang has the option -mstack-alignment for that purpose.
 ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
-        cc_stack_align_opt := -mpreferred-stack-boundary
-else ifneq ($(call cc-option, -mstack-alignment=4),)
-        cc_stack_align_opt := -mstack-alignment
+      cc_stack_align4 := -mpreferred-stack-boundary=2
+      cc_stack_align8 := -mpreferred-stack-boundary=3
+else ifneq ($(call cc-option, -mstack-alignment=16),)
+      cc_stack_align4 := -mstack-alignment=4
+      cc_stack_align8 := -mstack-alignment=8
 endif
 
 # How to compile the 16-bit code.  Note we always compile for -march=i386;
@@ -36,7 +38,7 @@ REALMODE_CFLAGS	:= $(M16_CFLAGS) -g -Os -D__KERNEL__ \
 
 REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
 REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align_opt)=2)
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
 export REALMODE_CFLAGS
 
 # BITS is used as extension for files which are available in a 32 bit
@@ -76,7 +78,7 @@ ifeq ($(CONFIG_X86_32),y)
         # Align the stack to the register width instead of using the default
         # alignment of 16 bytes. This reduces stack usage and the number of
         # alignment instructions.
-        KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=2)
+        KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
 
         # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
         # a lot more stack due to the lack of sharing of stacklots:
@@ -115,7 +117,7 @@ else
         # default alignment which keep the stack *mis*aligned.
         # Furthermore an alignment to the register width reduces stack usage
         # and the number of alignment instructions.
-        KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=3)
+        KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
 
 	# Use -mskip-rax-setup if supported.
 	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
@@ -232,9 +234,6 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
-KBUILD_CFLAGS += $(mflags-y)
-KBUILD_AFLAGS += $(mflags-y)
-
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c3e869eaef0c..e56dbc67e837 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -767,7 +767,7 @@ static efi_status_t setup_e820(struct boot_params *params,
 		m |= (u64)efi->efi_memmap_hi << 32;
 #endif
 
-		d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size));
+		d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i);
 		switch (d->type) {
 		case EFI_RESERVED_TYPE:
 		case EFI_RUNTIME_SERVICES_CODE:
@@ -997,6 +997,9 @@ struct boot_params *efi_main(struct efi_config *c,
 	if (boot_params->secure_boot == efi_secureboot_mode_unset)
 		boot_params->secure_boot = efi_get_secureboot(sys_table);
 
+	/* Ask the firmware to clear memory on unclean shutdown */
+	efi_enable_reset_attack_mitigation(sys_table);
+
 	setup_graphics(boot_params);
 
 	setup_efi_pci(boot_params);
@@ -1058,7 +1061,7 @@ struct boot_params *efi_main(struct efi_config *c,
 		desc->s = DESC_TYPE_CODE_DATA;
 		desc->dpl = 0;
 		desc->p = 1;
-		desc->limit = 0xf;
+		desc->limit1 = 0xf;
 		desc->avl = 0;
 		desc->l = 0;
 		desc->d = SEG_OP_SIZE_32BIT;
@@ -1078,7 +1081,7 @@ struct boot_params *efi_main(struct efi_config *c,
 	desc->s = DESC_TYPE_CODE_DATA;
 	desc->dpl = 0;
 	desc->p = 1;
-	desc->limit = 0xf;
+	desc->limit1 = 0xf;
 	desc->avl = 0;
 	if (IS_ENABLED(CONFIG_X86_64)) {
 		desc->l = 1;
@@ -1099,7 +1102,7 @@ struct boot_params *efi_main(struct efi_config *c,
 	desc->s = DESC_TYPE_CODE_DATA;
 	desc->dpl = 0;
 	desc->p = 1;
-	desc->limit = 0xf;
+	desc->limit1 = 0xf;
 	desc->avl = 0;
 	desc->l = 0;
 	desc->d = SEG_OP_SIZE_32BIT;
@@ -1116,7 +1119,7 @@ struct boot_params *efi_main(struct efi_config *c,
 		desc->s = 0;
 		desc->dpl = 0;
 		desc->p = 1;
-		desc->limit = 0x0;
+		desc->limit1 = 0x0;
 		desc->avl = 0;
 		desc->l = 0;
 		desc->d = 0;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d85b9625e836..11c68cf53d4e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -61,71 +61,6 @@
 
 	__HEAD
 ENTRY(startup_32)
-#ifdef CONFIG_EFI_STUB
-	jmp	preferred_addr
-
-	/*
-	 * We don't need the return address, so set up the stack so
-	 * efi_main() can find its arguments.
-	 */
-ENTRY(efi_pe_entry)
-	add	$0x4, %esp
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	popl	%ecx
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	popl	%ecx
-	movl	%ecx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-
-	call	make_boot_params
-	cmpl	$0, %eax
-	je	fail
-	movl	%esi, BP_code32_start(%eax)
-	popl	%ecx
-	pushl	%eax
-	pushl	%ecx
-	jmp	2f		/* Skip efi_config initialization */
-
-ENTRY(efi32_stub_entry)
-	add	$0x4, %esp
-	popl	%ecx
-	popl	%edx
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	movl	%edx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-2:
-	call	efi_main
-	cmpl	$0, %eax
-	movl	%eax, %esi
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
-	movl	BP_code32_start(%esi), %eax
-	leal	preferred_addr(%eax), %eax
-	jmp	*%eax
-
-preferred_addr:
-#endif
 	cld
 	/*
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -208,6 +143,70 @@ preferred_addr:
 	jmp	*%eax
 ENDPROC(startup_32)
 
+#ifdef CONFIG_EFI_STUB
+/*
+ * We don't need the return address, so set up the stack so efi_main() can find
+ * its arguments.
+ */
+ENTRY(efi_pe_entry)
+	add	$0x4, %esp
+
+	call	1f
+1:	popl	%esi
+	subl	$1b, %esi
+
+	popl	%ecx
+	movl	%ecx, efi32_config(%esi)	/* Handle */
+	popl	%ecx
+	movl	%ecx, efi32_config+8(%esi)	/* EFI System table pointer */
+
+	/* Relocate efi_config->call() */
+	leal	efi32_config(%esi), %eax
+	add	%esi, 40(%eax)
+	pushl	%eax
+
+	call	make_boot_params
+	cmpl	$0, %eax
+	je	fail
+	movl	%esi, BP_code32_start(%eax)
+	popl	%ecx
+	pushl	%eax
+	pushl	%ecx
+	jmp	2f		/* Skip efi_config initialization */
+ENDPROC(efi_pe_entry)
+
+ENTRY(efi32_stub_entry)
+	add	$0x4, %esp
+	popl	%ecx
+	popl	%edx
+
+	call	1f
+1:	popl	%esi
+	subl	$1b, %esi
+
+	movl	%ecx, efi32_config(%esi)	/* Handle */
+	movl	%edx, efi32_config+8(%esi)	/* EFI System table pointer */
+
+	/* Relocate efi_config->call() */
+	leal	efi32_config(%esi), %eax
+	add	%esi, 40(%eax)
+	pushl	%eax
+2:
+	call	efi_main
+	cmpl	$0, %eax
+	movl	%eax, %esi
+	jne	2f
+fail:
+	/* EFI init failed, so hang. */
+	hlt
+	jmp	fail
+2:
+	movl	BP_code32_start(%esi), %eax
+	leal	startup_32(%eax), %eax
+	jmp	*%eax
+ENDPROC(efi32_stub_entry)
+#endif
+
 	.text
 relocated:
 
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fbf4c32d0b62..b4a5d284391c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -243,65 +243,6 @@ ENTRY(startup_64)
 	 * that maps our entire kernel(text+data+bss+brk), zero page
 	 * and command line.
 	 */
-#ifdef CONFIG_EFI_STUB
-	/*
-	 * The entry point for the PE/COFF executable is efi_pe_entry, so
-	 * only legacy boot loaders will execute this jmp.
-	 */
-	jmp	preferred_addr
-
-ENTRY(efi_pe_entry)
-	movq	%rcx, efi64_config(%rip)	/* Handle */
-	movq	%rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
-	leaq	efi64_config(%rip), %rax
-	movq	%rax, efi_config(%rip)
-
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	addq	%rbp, efi64_config+40(%rip)
-
-	movq	%rax, %rdi
-	call	make_boot_params
-	cmpq	$0,%rax
-	je	fail
-	mov	%rax, %rsi
-	leaq	startup_32(%rip), %rax
-	movl	%eax, BP_code32_start(%rsi)
-	jmp	2f		/* Skip the relocation */
-
-handover_entry:
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	movq	efi_config(%rip), %rax
-	addq	%rbp, 40(%rax)
-2:
-	movq	efi_config(%rip), %rdi
-	call	efi_main
-	movq	%rax,%rsi
-	cmpq	$0,%rax
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
-	movl	BP_code32_start(%esi), %eax
-	leaq	preferred_addr(%rax), %rax
-	jmp	*%rax
-
-preferred_addr:
-#endif
 
 	/* Setup data segments. */
 	xorl	%eax, %eax
@@ -413,6 +354,59 @@ lvl5:
 	jmp	*%rax
 
 #ifdef CONFIG_EFI_STUB
+
+/* The entry point for the PE/COFF executable is efi_pe_entry. */
+ENTRY(efi_pe_entry)
+	movq	%rcx, efi64_config(%rip)	/* Handle */
+	movq	%rdx, efi64_config+8(%rip) /* EFI System table pointer */
+
+	leaq	efi64_config(%rip), %rax
+	movq	%rax, efi_config(%rip)
+
+	call	1f
+1:	popq	%rbp
+	subq	$1b, %rbp
+
+	/*
+	 * Relocate efi_config->call().
+	 */
+	addq	%rbp, efi64_config+40(%rip)
+
+	movq	%rax, %rdi
+	call	make_boot_params
+	cmpq	$0,%rax
+	je	fail
+	mov	%rax, %rsi
+	leaq	startup_32(%rip), %rax
+	movl	%eax, BP_code32_start(%rsi)
+	jmp	2f		/* Skip the relocation */
+
+handover_entry:
+	call	1f
+1:	popq	%rbp
+	subq	$1b, %rbp
+
+	/*
+	 * Relocate efi_config->call().
+	 */
+	movq	efi_config(%rip), %rax
+	addq	%rbp, 40(%rax)
+2:
+	movq	efi_config(%rip), %rdi
+	call	efi_main
+	movq	%rax,%rsi
+	cmpq	$0,%rax
+	jne	2f
+fail:
+	/* EFI init failed, so hang. */
+	hlt
+	jmp	fail
+2:
+	movl	BP_code32_start(%esi), %eax
+	leaq	startup_64(%rax), %rax
+	jmp	*%rax
+ENDPROC(efi_pe_entry)
+
 	.org 0x390
 ENTRY(efi64_stub_entry)
 	movq	%rdi, efi64_config(%rip)	/* Handle */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 91f27ab970ef..17818ba6906f 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -37,7 +37,9 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <linux/efi.h>
 #include <generated/utsrelease.h>
+#include <asm/efi.h>
 
 /* Macros used by the included decompressor code below. */
 #define STATIC
@@ -479,35 +481,31 @@ static unsigned long slots_fetch_random(void)
 	return 0;
 }
 
-static void process_e820_entry(struct boot_e820_entry *entry,
+static void process_mem_region(struct mem_vector *entry,
 			       unsigned long minimum,
 			       unsigned long image_size)
 {
 	struct mem_vector region, overlap;
 	struct slot_area slot_area;
 	unsigned long start_orig, end;
-	struct boot_e820_entry cur_entry;
-
-	/* Skip non-RAM entries. */
-	if (entry->type != E820_TYPE_RAM)
-		return;
+	struct mem_vector cur_entry;
 
 	/* On 32-bit, ignore entries entirely above our maximum. */
-	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
+	if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
 		return;
 
 	/* Ignore entries entirely below our minimum. */
-	if (entry->addr + entry->size < minimum)
+	if (entry->start + entry->size < minimum)
 		return;
 
 	/* Ignore entries above memory limit */
-	end = min(entry->size + entry->addr, mem_limit);
-	if (entry->addr >= end)
+	end = min(entry->size + entry->start, mem_limit);
+	if (entry->start >= end)
 		return;
-	cur_entry.addr = entry->addr;
-	cur_entry.size = end - entry->addr;
+	cur_entry.start = entry->start;
+	cur_entry.size = end - entry->start;
 
-	region.start = cur_entry.addr;
+	region.start = cur_entry.start;
 	region.size = cur_entry.size;
 
 	/* Give up if slot area array is full. */
@@ -521,8 +519,8 @@ static void process_e820_entry(struct boot_e820_entry *entry,
 		/* Potentially raise address to meet alignment needs. */
 		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
 
-		/* Did we raise the address above this e820 region? */
-		if (region.start > cur_entry.addr + cur_entry.size)
+		/* Did we raise the address above the passed in memory entry? */
+		if (region.start > cur_entry.start + cur_entry.size)
 			return;
 
 		/* Reduce size by any delta from the original address. */
@@ -562,31 +560,126 @@ static void process_e820_entry(struct boot_e820_entry *entry,
 	}
 }
 
-static unsigned long find_random_phys_addr(unsigned long minimum,
-					   unsigned long image_size)
+#ifdef CONFIG_EFI
+/*
+ * Returns true if mirror region found (and must have been processed
+ * for slots adding)
+ */
+static bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
 {
+	struct efi_info *e = &boot_params->efi_info;
+	bool efi_mirror_found = false;
+	struct mem_vector region;
+	efi_memory_desc_t *md;
+	unsigned long pmap;
+	char *signature;
+	u32 nr_desc;
 	int i;
-	unsigned long addr;
 
-	/* Check if we had too many memmaps. */
-	if (memmap_too_large) {
-		debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n");
-		return 0;
+	signature = (char *)&e->efi_loader_signature;
+	if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+	    strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
+		return false;
+
+#ifdef CONFIG_X86_32
+	/* Can't handle data above 4GB at this time */
+	if (e->efi_memmap_hi) {
+		warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
+		return false;
 	}
+	pmap =  e->efi_memmap;
+#else
+	pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
+#endif
 
-	/* Make sure minimum is aligned. */
-	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+	nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
+	for (i = 0; i < nr_desc; i++) {
+		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
+			efi_mirror_found = true;
+			break;
+		}
+	}
+
+	for (i = 0; i < nr_desc; i++) {
+		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+
+		/*
+		 * Here we are more conservative in picking free memory than
+		 * the EFI spec allows:
+		 *
+		 * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
+		 * free memory and thus available to place the kernel image into,
+		 * but in practice there's firmware where using that memory leads
+		 * to crashes.
+		 *
+		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
+		 */
+		if (md->type != EFI_CONVENTIONAL_MEMORY)
+			continue;
+
+		if (efi_mirror_found &&
+		    !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+			continue;
+
+		region.start = md->phys_addr;
+		region.size = md->num_pages << EFI_PAGE_SHIFT;
+		process_mem_region(&region, minimum, image_size);
+		if (slot_area_index == MAX_SLOT_AREA) {
+			debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+			break;
+		}
+	}
+	return true;
+}
+#else
+static inline bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+	return false;
+}
+#endif
+
+static void process_e820_entries(unsigned long minimum,
+				 unsigned long image_size)
+{
+	int i;
+	struct mem_vector region;
+	struct boot_e820_entry *entry;
 
 	/* Verify potential e820 positions, appending to slots list. */
 	for (i = 0; i < boot_params->e820_entries; i++) {
-		process_e820_entry(&boot_params->e820_table[i], minimum,
-				   image_size);
+		entry = &boot_params->e820_table[i];
+		/* Skip non-RAM entries. */
+		if (entry->type != E820_TYPE_RAM)
+			continue;
+		region.start = entry->addr;
+		region.size = entry->size;
+		process_mem_region(&region, minimum, image_size);
 		if (slot_area_index == MAX_SLOT_AREA) {
 			debug_putstr("Aborted e820 scan (slot_areas full)!\n");
 			break;
 		}
 	}
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+					   unsigned long image_size)
+{
+	/* Check if we had too many memmaps. */
+	if (memmap_too_large) {
+		debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
+		return 0;
+	}
+
+	/* Make sure minimum is aligned. */
+	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+	if (process_efi_entries(minimum, image_size))
+		return slots_fetch_random();
 
+	process_e820_entries(minimum, image_size);
 	return slots_fetch_random();
 }
 
@@ -645,7 +738,7 @@ void choose_random_location(unsigned long input,
 	 */
 	min_addr = min(*output, 512UL << 20);
 
-	/* Walk e820 and find a random address. */
+	/* Walk available memory entries to find a random address. */
 	random_addr = find_random_phys_addr(min_addr, output_size);
 	if (!random_addr) {
 		warn("Physical KASLR disabled: no suitable memory region!");
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 28029be47fbb..f1aa43854bed 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -15,6 +15,13 @@
 #define __pa(x)  ((unsigned long)(x))
 #define __va(x)  ((void *)((unsigned long)(x)))
 
+/*
+ * The pgtable.h and mm/ident_map.c includes make use of the SME related
+ * information which is not used in the compressed image support. Un-define
+ * the SME support to avoid any compile and link errors.
+ */
+#undef CONFIG_AMD_MEM_ENCRYPT
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 4b429df40d7a..550cd5012b73 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,3 +1,5 @@
 CONFIG_NOHIGHMEM=y
 # CONFIG_HIGHMEM4G is not set
 # CONFIG_HIGHMEM64G is not set
+CONFIG_GUESS_UNWINDER=y
+# CONFIG_FRAME_POINTER_UNWINDER is not set
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4a55cdcdc008..5c15d6b57329 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	aesni_enc(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
+
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 17c05531dfd1..f9eca34301e2 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	unsigned int nbytes = walk->nbytes;
 
 	blowfish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 8648158f3916..dbea6020ffe7 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	unsigned int nbytes = walk->nbytes;
 
 	__cast5_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index d6fc59aaaadf..30c0a37f4882 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	des3_ede_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 9976fcecd17e..af28a8a24366 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the x86 low level entry code
 #
 
-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
 CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 05ed3d393da7..640aafebdc00 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,4 +1,5 @@
 #include <linux/jump_label.h>
+#include <asm/unwind_hints.h>
 
 /*
 
@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
 	movq %rdx, 12*8+\offset(%rsp)
 	movq %rsi, 13*8+\offset(%rsp)
 	movq %rdi, 14*8+\offset(%rsp)
+	UNWIND_HINT_REGS offset=\offset extra=0
 	.endm
 	.macro SAVE_C_REGS offset=0
 	SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
 	movq %r12, 3*8+\offset(%rsp)
 	movq %rbp, 4*8+\offset(%rsp)
 	movq %rbx, 5*8+\offset(%rsp)
+	UNWIND_HINT_REGS offset=\offset
 	.endm
 
 	.macro RESTORE_EXTRA_REGS offset=0
@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
 	movq 3*8+\offset(%rsp), %r12
 	movq 4*8+\offset(%rsp), %rbp
 	movq 5*8+\offset(%rsp), %rbx
+	UNWIND_HINT_REGS offset=\offset extra=0
 	.endm
 
 	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
 	.endif
 	movq 13*8(%rsp), %rsi
 	movq 14*8(%rsp), %rdi
+	UNWIND_HINT_IRET_REGS offset=16*8
 	.endm
 	.macro RESTORE_C_REGS
 	RESTORE_C_REGS_HELPER 1,1,1,1,1
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index cdefcfdd9e63..03505ffbe1b6 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -23,6 +23,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
 #include <linux/livepatch.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -183,6 +184,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	addr_limit_user_check();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 48ef7bb32c42..8a13d468635a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -673,16 +673,8 @@ ENTRY(name)				\
 	jmp	ret_from_intr;		\
 ENDPROC(name)
 
-
-#ifdef CONFIG_TRACING
-# define TRACE_BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
-#else
-# define TRACE_BUILD_INTERRUPT(name, nr)
-#endif
-
 #define BUILD_INTERRUPT(name, nr)		\
 	BUILD_INTERRUPT3(name, nr, smp_##name);	\
-	TRACE_BUILD_INTERRUPT(name, nr)
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include <asm/entry_arch.h>
@@ -880,25 +872,17 @@ ENTRY(xen_failsafe_callback)
 ENDPROC(xen_failsafe_callback)
 
 BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-		xen_evtchn_do_upcall)
+		 xen_evtchn_do_upcall)
 
 #endif /* CONFIG_XEN */
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
 BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-	hyperv_vector_handler)
+		 hyperv_vector_handler)
 
 #endif /* CONFIG_HYPERV */
 
-#ifdef CONFIG_TRACING
-ENTRY(trace_page_fault)
-	ASM_CLAC
-	pushl	$trace_do_page_fault
-	jmp	common_exception
-END(trace_page_fault)
-#endif
-
 ENTRY(page_fault)
 	ASM_CLAC
 	pushl	$do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 6d078b89a5e8..49167258d587 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -43,9 +44,10 @@
 
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
+	UNWIND_HINT_EMPTY
 	swapgs
 	sysretq
-ENDPROC(native_usergs_sysret64)
+END(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 .macro TRACE_IRQS_IRETQ
@@ -134,19 +136,14 @@ ENDPROC(native_usergs_sysret64)
  */
 
 ENTRY(entry_SYSCALL_64)
+	UNWIND_HINT_EMPTY
 	/*
 	 * Interrupts are off on entry.
 	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
 	 * it is too small to ever cause noticeable irq latency.
 	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
 
+	swapgs
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -158,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
 	pushq	%rdi				/* pt_regs->di */
 	pushq	%rsi				/* pt_regs->si */
@@ -169,6 +167,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r10				/* pt_regs->r10 */
 	pushq	%r11				/* pt_regs->r11 */
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
+	UNWIND_HINT_REGS extra=0
 
 	/*
 	 * If we need to do entry work or if we guess we'll need to do
@@ -223,6 +222,7 @@ entry_SYSCALL_64_fastpath:
 	movq	EFLAGS(%rsp), %r11
 	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq	RSP(%rsp), %rsp
+	UNWIND_HINT_EMPTY
 	USERGS_SYSRET64
 
 1:
@@ -316,6 +316,7 @@ syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
 	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq	RSP(%rsp), %rsp
+	UNWIND_HINT_EMPTY
 	USERGS_SYSRET64
 
 opportunistic_sysret_failed:
@@ -343,6 +344,7 @@ ENTRY(stub_ptregs_64)
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	popq	%rax
+	UNWIND_HINT_REGS extra=0
 	jmp	entry_SYSCALL64_slow_path
 
 1:
@@ -351,6 +353,7 @@ END(stub_ptregs_64)
 
 .macro ptregs_stub func
 ENTRY(ptregs_\func)
+	UNWIND_HINT_FUNC
 	leaq	\func(%rip), %rax
 	jmp	stub_ptregs_64
 END(ptregs_\func)
@@ -367,6 +370,7 @@ END(ptregs_\func)
  * %rsi: next task
  */
 ENTRY(__switch_to_asm)
+	UNWIND_HINT_FUNC
 	/*
 	 * Save callee-saved registers
 	 * This must match the order in inactive_task_frame
@@ -406,6 +410,7 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
+	UNWIND_HINT_EMPTY
 	movq	%rax, %rdi
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
@@ -413,6 +418,7 @@ ENTRY(ret_from_fork)
 	jnz	1f				/* kernel threads are uncommon */
 
 2:
+	UNWIND_HINT_REGS
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
@@ -440,13 +446,102 @@ END(ret_from_fork)
 ENTRY(irq_entries_start)
     vector=FIRST_EXTERNAL_VECTOR
     .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	UNWIND_HINT_IRET_REGS
 	pushq	$(~vector+0x80)			/* Note: always in signed byte range */
-    vector=vector+1
 	jmp	common_interrupt
 	.align	8
+	vector=vector+1
     .endr
 END(irq_entries_start)
 
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+#ifdef CONFIG_DEBUG_ENTRY
+	pushfq
+	testl $X86_EFLAGS_IF, (%rsp)
+	jz .Lokay_\@
+	ud2
+.Lokay_\@:
+	addq $8, %rsp
+#endif
+.endm
+
+/*
+ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+ */
+.macro ENTER_IRQ_STACK regs=1 old_rsp
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	movq	%rsp, \old_rsp
+
+	.if \regs
+	UNWIND_HINT_REGS base=\old_rsp
+	.endif
+
+	incl	PER_CPU_VAR(irq_count)
+	jnz	.Lirq_stack_push_old_rsp_\@
+
+	/*
+	 * Right now, if we just incremented irq_count to zero, we've
+	 * claimed the IRQ stack but we haven't switched to it yet.
+	 *
+	 * If anything is added that can interrupt us here without using IST,
+	 * it must be *extremely* careful to limit its stack usage.  This
+	 * could include kprobes and a hypothetical future IST-less #DB
+	 * handler.
+	 *
+	 * The OOPS unwinder relies on the word at the top of the IRQ
+	 * stack linking back to the previous RSP for the entire time we're
+	 * on the IRQ stack.  For this to work reliably, we need to write
+	 * it before we actually move ourselves to the IRQ stack.
+	 */
+
+	movq	\old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+	movq	PER_CPU_VAR(irq_stack_ptr), %rsp
+
+#ifdef CONFIG_DEBUG_ENTRY
+	/*
+	 * If the first movq above becomes wrong due to IRQ stack layout
+	 * changes, the only way we'll notice is if we try to unwind right
+	 * here.  Assert that we set up the stack right to catch this type
+	 * of bug quickly.
+	 */
+	cmpq	-8(%rsp), \old_rsp
+	je	.Lirq_stack_okay\@
+	ud2
+	.Lirq_stack_okay\@:
+#endif
+
+.Lirq_stack_push_old_rsp_\@:
+	pushq	\old_rsp
+
+	.if \regs
+	UNWIND_HINT_REGS indirect=1
+	.endif
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK.
+ */
+.macro LEAVE_IRQ_STACK regs=1
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	/* We need to be off the IRQ stack before decrementing irq_count. */
+	popq	%rsp
+
+	.if \regs
+	UNWIND_HINT_REGS
+	.endif
+
+	/*
+	 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+	 * the irq stack but we're not on it.
+	 */
+
+	decl	PER_CPU_VAR(irq_count)
+.endm
+
 /*
  * Interrupt entry/exit.
  *
@@ -485,17 +580,7 @@ END(irq_entries_start)
 	CALL_enter_from_user_mode
 
 1:
-	/*
-	 * Save previous stack pointer, optionally switch to interrupt stack.
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-	movq	%rsp, %rdi
-	incl	PER_CPU_VAR(irq_count)
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rdi
+	ENTER_IRQ_STACK old_rsp=%rdi
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -515,10 +600,8 @@ common_interrupt:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
-	decl	PER_CPU_VAR(irq_count)
 
-	/* Restore saved previous stack */
-	popq	%rsp
+	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
@@ -561,6 +644,7 @@ restore_c_regs_and_iret:
 	INTERRUPT_RETURN
 
 ENTRY(native_iret)
+	UNWIND_HINT_IRET_REGS
 	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
@@ -633,6 +717,7 @@ native_irq_return_ldt:
 	orq	PER_CPU_VAR(espfix_stack), %rax
 	SWAPGS
 	movq	%rax, %rsp
+	UNWIND_HINT_IRET_REGS offset=8
 
 	/*
 	 * At this point, we cannot write to the stack any more, but we can
@@ -654,6 +739,7 @@ END(common_interrupt)
  */
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
+	UNWIND_HINT_IRET_REGS
 	ASM_CLAC
 	pushq	$~(\num)
 .Lcommon_\sym:
@@ -662,31 +748,13 @@ ENTRY(\sym)
 END(\sym)
 .endm
 
-#ifdef CONFIG_TRACING
-#define trace(sym) trace_##sym
-#define smp_trace(sym) smp_trace_##sym
-
-.macro trace_apicinterrupt num sym
-apicinterrupt3 \num trace(\sym) smp_trace(\sym)
-.endm
-#else
-.macro trace_apicinterrupt num sym do_sym
-.endm
-#endif
-
 /* Make sure APIC interrupt handlers end up in the irqentry section: */
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
-# define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
-# define POP_SECTION_IRQENTRY	.popsection
-#else
-# define PUSH_SECTION_IRQENTRY
-# define POP_SECTION_IRQENTRY
-#endif
+#define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
+#define POP_SECTION_IRQENTRY	.popsection
 
 .macro apicinterrupt num sym do_sym
 PUSH_SECTION_IRQENTRY
 apicinterrupt3 \num \sym \do_sym
-trace_apicinterrupt \num \sym
 POP_SECTION_IRQENTRY
 .endm
 
@@ -740,13 +808,14 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
+	UNWIND_HINT_IRET_REGS offset=8
+
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
 	ASM_CLAC
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
 
 	.ifeq \has_error_code
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
@@ -763,6 +832,7 @@ ENTRY(\sym)
 	.else
 	call	error_entry
 	.endif
+	UNWIND_HINT_REGS
 	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
 	.if \paranoid
@@ -829,17 +899,6 @@ ENTRY(\sym)
 END(\sym)
 .endm
 
-#ifdef CONFIG_TRACING
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#else
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#endif
-
 idtentry divide_error			do_divide_error			has_error_code=0
 idtentry overflow			do_overflow			has_error_code=0
 idtentry bounds				do_bounds			has_error_code=0
@@ -860,6 +919,7 @@ idtentry simd_coprocessor_error		do_simd_coprocessor_error	has_error_code=0
 	 * edi:  new selector
 	 */
 ENTRY(native_load_gs_index)
+	FRAME_BEGIN
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
 	SWAPGS
@@ -868,8 +928,9 @@ ENTRY(native_load_gs_index)
 2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	SWAPGS
 	popfq
+	FRAME_END
 	ret
-END(native_load_gs_index)
+ENDPROC(native_load_gs_index)
 EXPORT_SYMBOL(native_load_gs_index)
 
 	_ASM_EXTABLE(.Lgs_change, bad_gs)
@@ -892,17 +953,15 @@ bad_gs:
 ENTRY(do_softirq_own_stack)
 	pushq	%rbp
 	mov	%rsp, %rbp
-	incl	PER_CPU_VAR(irq_count)
-	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
-	push	%rbp				/* frame pointer backlink */
+	ENTER_IRQ_STACK regs=0 old_rsp=%r11
 	call	__do_softirq
+	LEAVE_IRQ_STACK regs=0
 	leaveq
-	decl	PER_CPU_VAR(irq_count)
 	ret
-END(do_softirq_own_stack)
+ENDPROC(do_softirq_own_stack)
 
 #ifdef CONFIG_XEN
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
 
 /*
  * A note on the "critical region" in our callback handler.
@@ -923,14 +982,14 @@ ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  * see the correct pointer to the pt_regs
  */
+	UNWIND_HINT_FUNC
 	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
-11:	incl	PER_CPU_VAR(irq_count)
-	movq	%rsp, %rbp
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rbp				/* frame pointer backlink */
+	UNWIND_HINT_REGS
+
+	ENTER_IRQ_STACK old_rsp=%r10
 	call	xen_evtchn_do_upcall
-	popq	%rsp
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
+
 #ifndef CONFIG_PREEMPT
 	call	xen_maybe_preempt_hcall
 #endif
@@ -951,6 +1010,7 @@ END(xen_do_hypervisor_callback)
  * with its current contents: any discrepancy means we in category 1.
  */
 ENTRY(xen_failsafe_callback)
+	UNWIND_HINT_EMPTY
 	movl	%ds, %ecx
 	cmpw	%cx, 0x10(%rsp)
 	jne	1f
@@ -968,13 +1028,13 @@ ENTRY(xen_failsafe_callback)
 	movq	8(%rsp), %r11
 	addq	$0x30, %rsp
 	pushq	$0				/* RIP */
-	pushq	%r11
-	pushq	%rcx
+	UNWIND_HINT_IRET_REGS offset=8
 	jmp	general_protection
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
 	movq	(%rsp), %rcx
 	movq	8(%rsp), %r11
 	addq	$0x30, %rsp
+	UNWIND_HINT_IRET_REGS
 	pushq	$-1 /* orig_ax = -1 => not a system call */
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
@@ -998,13 +1058,12 @@ idtentry int3			do_int3			has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN
-idtentry xen_debug		do_debug		has_error_code=0
-idtentry xen_int3		do_int3			has_error_code=0
-idtentry xen_stack_segment	do_stack_segment	has_error_code=1
+idtentry xendebug		do_debug		has_error_code=0
+idtentry xenint3		do_int3			has_error_code=0
 #endif
 
 idtentry general_protection	do_general_protection	has_error_code=1
-trace_idtentry page_fault	do_page_fault		has_error_code=1
+idtentry page_fault		do_page_fault		has_error_code=1
 
 #ifdef CONFIG_KVM_GUEST
 idtentry async_page_fault	do_async_page_fault	has_error_code=1
@@ -1020,6 +1079,7 @@ idtentry machine_check					has_error_code=0	paranoid=1 do_sym=*machine_check_vec
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
+	UNWIND_HINT_FUNC
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
@@ -1047,6 +1107,7 @@ END(paranoid_entry)
  * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
  */
 ENTRY(paranoid_exit)
+	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF_DEBUG
 	testl	%ebx, %ebx			/* swapgs needed? */
@@ -1068,6 +1129,7 @@ END(paranoid_exit)
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
+	UNWIND_HINT_FUNC
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
@@ -1152,6 +1214,7 @@ END(error_entry)
  *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
  */
 ENTRY(error_exit)
+	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	testl	%ebx, %ebx
@@ -1160,19 +1223,9 @@ ENTRY(error_exit)
 END(error_exit)
 
 /* Runs on exception stack */
+/* XXX: broken on Xen PV */
 ENTRY(nmi)
-	/*
-	 * Fix up the exception frame if we're on Xen.
-	 * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
-	 * one value to the stack on native, so it may clobber the rdx
-	 * scratch slot, but it won't clobber any of the important
-	 * slots past it.
-	 *
-	 * Xen is a different story, because the Xen frame itself overlaps
-	 * the "NMI executing" variable.
-	 */
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-
+	UNWIND_HINT_IRET_REGS
 	/*
 	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
 	 * the iretq it performs will take us out of NMI context.
@@ -1234,11 +1287,13 @@ ENTRY(nmi)
 	cld
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	UNWIND_HINT_IRET_REGS base=%rdx offset=8
 	pushq	5*8(%rdx)	/* pt_regs->ss */
 	pushq	4*8(%rdx)	/* pt_regs->rsp */
 	pushq	3*8(%rdx)	/* pt_regs->flags */
 	pushq	2*8(%rdx)	/* pt_regs->cs */
 	pushq	1*8(%rdx)	/* pt_regs->rip */
+	UNWIND_HINT_IRET_REGS
 	pushq   $-1		/* pt_regs->orig_ax */
 	pushq   %rdi		/* pt_regs->di */
 	pushq   %rsi		/* pt_regs->si */
@@ -1255,6 +1310,7 @@ ENTRY(nmi)
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r15		/* pt_regs->r15 */
+	UNWIND_HINT_REGS
 	ENCODE_FRAME_POINTER
 
 	/*
@@ -1409,6 +1465,7 @@ first_nmi:
 	.rept 5
 	pushq	11*8(%rsp)
 	.endr
+	UNWIND_HINT_IRET_REGS
 
 	/* Everything up to here is safe from nested NMIs */
 
@@ -1424,6 +1481,7 @@ first_nmi:
 	pushq	$__KERNEL_CS	/* CS */
 	pushq	$1f		/* RIP */
 	INTERRUPT_RETURN	/* continues at repeat_nmi below */
+	UNWIND_HINT_IRET_REGS
 1:
 #endif
 
@@ -1473,6 +1531,7 @@ end_repeat_nmi:
 	 * exceptions might do.
 	 */
 	call	paranoid_entry
+	UNWIND_HINT_REGS
 
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq	%rsp, %rdi
@@ -1510,17 +1569,19 @@ nmi_restore:
 END(nmi)
 
 ENTRY(ignore_sysret)
+	UNWIND_HINT_EMPTY
 	mov	$-ENOSYS, %eax
 	sysret
 END(ignore_sysret)
 
 ENTRY(rewind_stack_do_exit)
+	UNWIND_HINT_FUNC
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
 
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
-	leaq	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+	leaq	-PTREGS_SIZE(%rax), %rsp
+	UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
 
 	call	do_exit
-1:	jmp 1b
 END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..e26c25ca7756 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
  */
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%eax, %eax
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
 	pushq	%r8			/* pt_regs->sp */
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
@@ -294,7 +293,6 @@ ENTRY(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
 	 */
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	ASM_CLAC			/* Do this early to minimize exposure */
 	SWAPGS
 
@@ -342,8 +340,7 @@ ENTRY(entry_INT80_compat)
 	jmp	restore_regs_and_iret
 END(entry_INT80_compat)
 
-	ALIGN
-GLOBAL(stub32_clone)
+ENTRY(stub32_clone)
 	/*
 	 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
 	 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
@@ -353,3 +350,4 @@ GLOBAL(stub32_clone)
 	 */
 	xchg	%r8, %rcx
 	jmp	sys_clone
+ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 726355ce8497..1911310959f8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -351,7 +351,7 @@ static void vgetcpu_cpu_init(void *arg)
 	 * and 8 bits for the node)
 	 */
 	d.limit0 = cpu | ((node & 0xf) << 12);
-	d.limit = node >> 4;
+	d.limit1 = node >> 4;
 	d.type = 5;		/* RO data, expand down, accessed */
 	d.dpl = 3;		/* Visible to user code */
 	d.s = 1;		/* Not a system segment */
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index ad44af0dd667..f5cbbba99283 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -400,11 +400,24 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
 
 	if (amd_uncore_llc) {
 		unsigned int apicid = cpu_data(cpu).apicid;
-		unsigned int nshared;
+		unsigned int nshared, subleaf, prev_eax = 0;
 
 		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
-		cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
-		nshared = ((eax >> 14) & 0xfff) + 1;
+		/*
+		 * Iterate over Cache Topology Definition leaves until no
+		 * more cache descriptions are available.
+		 */
+		for (subleaf = 0; subleaf < 5; subleaf++) {
+			cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
+
+			/* EAX[0:4] gives type of cache */
+			if (!(eax & 0x1f))
+				break;
+
+			prev_eax = eax;
+		}
+		nshared = ((prev_eax >> 14) & 0xfff) + 1;
+
 		uncore->id = apicid - (apicid % nshared);
 
 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
@@ -555,7 +568,7 @@ static int __init amd_uncore_init(void)
 		ret = 0;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
+	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
 		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
 		if (!amd_uncore_llc) {
 			ret = -ENOMEM;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 939050169d12..80534d3c2480 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -487,22 +487,28 @@ static inline int precise_br_compat(struct perf_event *event)
 	return m == b;
 }
 
-int x86_pmu_hw_config(struct perf_event *event)
+int x86_pmu_max_precise(void)
 {
-	if (event->attr.precise_ip) {
-		int precise = 0;
+	int precise = 0;
+
+	/* Support for constant skid */
+	if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+		precise++;
 
-		/* Support for constant skid */
-		if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+		/* Support for IP fixup */
+		if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
 			precise++;
 
-			/* Support for IP fixup */
-			if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
-				precise++;
+		if (x86_pmu.pebs_prec_dist)
+			precise++;
+	}
+	return precise;
+}
 
-			if (x86_pmu.pebs_prec_dist)
-				precise++;
-		}
+int x86_pmu_hw_config(struct perf_event *event)
+{
+	if (event->attr.precise_ip) {
+		int precise = x86_pmu_max_precise();
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
@@ -1751,6 +1757,7 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 }
 
 static struct attribute_group x86_pmu_attr_group;
+static struct attribute_group x86_pmu_caps_group;
 
 static int __init init_hw_perf_events(void)
 {
@@ -1799,6 +1806,14 @@ static int __init init_hw_perf_events(void)
 
 	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 
+	if (x86_pmu.caps_attrs) {
+		struct attribute **tmp;
+
+		tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
+		if (!WARN_ON(!tmp))
+			x86_pmu_caps_group.attrs = tmp;
+	}
+
 	if (x86_pmu.event_attrs)
 		x86_pmu_events_group.attrs = x86_pmu.event_attrs;
 
@@ -2213,10 +2228,30 @@ static struct attribute_group x86_pmu_attr_group = {
 	.attrs = x86_pmu_attrs,
 };
 
+static ssize_t max_precise_show(struct device *cdev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
+}
+
+static DEVICE_ATTR_RO(max_precise);
+
+static struct attribute *x86_pmu_caps_attrs[] = {
+	&dev_attr_max_precise.attr,
+	NULL
+};
+
+static struct attribute_group x86_pmu_caps_group = {
+	.name = "caps",
+	.attrs = x86_pmu_caps_attrs,
+};
+
 static const struct attribute_group *x86_pmu_attr_groups[] = {
 	&x86_pmu_attr_group,
 	&x86_pmu_format_group,
 	&x86_pmu_events_group,
+	&x86_pmu_caps_group,
 	NULL,
 };
 
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 06c2baa51814..e9d8520a801a 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl-perf.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index ddd8d3516bfc..16076eb34699 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -268,7 +268,7 @@ static void bts_event_start(struct perf_event *event, int flags)
 	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
 	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
 
-	event->hw.itrace_started = 1;
+	perf_event_itrace_started(event);
 	event->hw.state = 0;
 
 	__bts_event_start(event);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 98b0f0729527..829e89cfcee2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3415,12 +3415,26 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_any.attr,
 	&format_attr_inv.attr,
 	&format_attr_cmask.attr,
+	NULL,
+};
+
+static struct attribute *hsw_format_attr[] = {
 	&format_attr_in_tx.attr,
 	&format_attr_in_tx_cp.attr,
+	&format_attr_offcore_rsp.attr,
+	&format_attr_ldlat.attr,
+	NULL
+};
 
-	&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
-	&format_attr_ldlat.attr, /* PEBS load latency */
-	NULL,
+static struct attribute *nhm_format_attr[] = {
+	&format_attr_offcore_rsp.attr,
+	&format_attr_ldlat.attr,
+	NULL
+};
+
+static struct attribute *slm_format_attr[] = {
+	&format_attr_offcore_rsp.attr,
+	NULL
 };
 
 static struct attribute *skl_format_attr[] = {
@@ -3781,6 +3795,36 @@ done:
 
 static DEVICE_ATTR_RW(freeze_on_smi);
 
+static ssize_t branches_show(struct device *cdev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *lbr_attrs[] = {
+	&dev_attr_branches.attr,
+	NULL
+};
+
+static char pmu_name_str[30];
+
+static ssize_t pmu_name_show(struct device *cdev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
+}
+
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *intel_pmu_caps_attrs[] = {
+       &dev_attr_pmu_name.attr,
+       NULL
+};
+
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
 	NULL,
@@ -3795,6 +3839,8 @@ __init int intel_pmu_init(void)
 	unsigned int unused;
 	struct extra_reg *er;
 	int version, i;
+	struct attribute **extra_attr = NULL;
+	char *name;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 		switch (boot_cpu_data.x86) {
@@ -3862,6 +3908,7 @@ __init int intel_pmu_init(void)
 	switch (boot_cpu_data.x86_model) {
 	case INTEL_FAM6_CORE_YONAH:
 		pr_cont("Core events, ");
+		name = "core";
 		break;
 
 	case INTEL_FAM6_CORE2_MEROM:
@@ -3877,6 +3924,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_core2_event_constraints;
 		x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
 		pr_cont("Core2 events, ");
+		name = "core2";
 		break;
 
 	case INTEL_FAM6_NEHALEM:
@@ -3905,8 +3953,11 @@ __init int intel_pmu_init(void)
 
 		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
+		x86_pmu.pebs_no_tlb = 1;
+		extra_attr = nhm_format_attr;
 
 		pr_cont("Nehalem events, ");
+		name = "nehalem";
 		break;
 
 	case INTEL_FAM6_ATOM_PINEVIEW:
@@ -3923,6 +3974,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
 		pr_cont("Atom events, ");
+		name = "bonnell";
 		break;
 
 	case INTEL_FAM6_ATOM_SILVERMONT1:
@@ -3940,7 +3992,9 @@ __init int intel_pmu_init(void)
 		x86_pmu.extra_regs = intel_slm_extra_regs;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.cpu_events = slm_events_attrs;
+		extra_attr = slm_format_attr;
 		pr_cont("Silvermont events, ");
+		name = "silvermont";
 		break;
 
 	case INTEL_FAM6_ATOM_GOLDMONT:
@@ -3965,7 +4019,9 @@ __init int intel_pmu_init(void)
 		x86_pmu.lbr_pt_coexist = true;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.cpu_events = glm_events_attrs;
+		extra_attr = slm_format_attr;
 		pr_cont("Goldmont events, ");
+		name = "goldmont";
 		break;
 
 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
@@ -3991,7 +4047,9 @@ __init int intel_pmu_init(void)
 		x86_pmu.cpu_events = glm_events_attrs;
 		/* Goldmont Plus has 4-wide pipeline */
 		event_attr_td_total_slots_scale_glm.event_str = "4";
+		extra_attr = slm_format_attr;
 		pr_cont("Goldmont plus events, ");
+		name = "goldmont_plus";
 		break;
 
 	case INTEL_FAM6_WESTMERE:
@@ -4020,7 +4078,9 @@ __init int intel_pmu_init(void)
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
 		intel_pmu_pebs_data_source_nhm();
+		extra_attr = nhm_format_attr;
 		pr_cont("Westmere events, ");
+		name = "westmere";
 		break;
 
 	case INTEL_FAM6_SANDYBRIDGE:
@@ -4056,7 +4116,10 @@ __init int intel_pmu_init(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
 
+		extra_attr = nhm_format_attr;
+
 		pr_cont("SandyBridge events, ");
+		name = "sandybridge";
 		break;
 
 	case INTEL_FAM6_IVYBRIDGE:
@@ -4090,7 +4153,10 @@ __init int intel_pmu_init(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
 			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
 
+		extra_attr = nhm_format_attr;
+
 		pr_cont("IvyBridge events, ");
+		name = "ivybridge";
 		break;
 
 
@@ -4118,7 +4184,10 @@ __init int intel_pmu_init(void)
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		x86_pmu.cpu_events = hsw_events_attrs;
 		x86_pmu.lbr_double_abort = true;
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			hsw_format_attr : nhm_format_attr;
 		pr_cont("Haswell events, ");
+		name = "haswell";
 		break;
 
 	case INTEL_FAM6_BROADWELL_CORE:
@@ -4154,7 +4223,10 @@ __init int intel_pmu_init(void)
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		x86_pmu.cpu_events = hsw_events_attrs;
 		x86_pmu.limit_period = bdw_limit_period;
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			hsw_format_attr : nhm_format_attr;
 		pr_cont("Broadwell events, ");
+		name = "broadwell";
 		break;
 
 	case INTEL_FAM6_XEON_PHI_KNL:
@@ -4172,8 +4244,9 @@ __init int intel_pmu_init(void)
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
+		extra_attr = slm_format_attr;
 		pr_cont("Knights Landing/Mill events, ");
+		name = "knights-landing";
 		break;
 
 	case INTEL_FAM6_SKYLAKE_MOBILE:
@@ -4203,11 +4276,14 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
-		x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
-						  skl_format_attr);
-		WARN_ON(!x86_pmu.format_attrs);
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			hsw_format_attr : nhm_format_attr;
+		extra_attr = merge_attr(extra_attr, skl_format_attr);
 		x86_pmu.cpu_events = hsw_events_attrs;
+		intel_pmu_pebs_data_source_skl(
+			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
 		pr_cont("Skylake events, ");
+		name = "skylake";
 		break;
 
 	default:
@@ -4215,6 +4291,7 @@ __init int intel_pmu_init(void)
 		case 1:
 			x86_pmu.event_constraints = intel_v1_event_constraints;
 			pr_cont("generic architected perfmon v1, ");
+			name = "generic_arch_v1";
 			break;
 		default:
 			/*
@@ -4222,10 +4299,19 @@ __init int intel_pmu_init(void)
 			 */
 			x86_pmu.event_constraints = intel_gen_event_constraints;
 			pr_cont("generic architected perfmon, ");
+			name = "generic_arch_v2+";
 			break;
 		}
 	}
 
+	snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name);
+
+	if (version >= 2 && extra_attr) {
+		x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+						  extra_attr);
+		WARN_ON(!x86_pmu.format_attrs);
+	}
+
 	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
 		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
 		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
@@ -4272,8 +4358,13 @@ __init int intel_pmu_init(void)
 			x86_pmu.lbr_nr = 0;
 	}
 
-	if (x86_pmu.lbr_nr)
+	x86_pmu.caps_attrs = intel_pmu_caps_attrs;
+
+	if (x86_pmu.lbr_nr) {
+		x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
 		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
+	}
+
 	/*
 	 * Access extra MSR may cause #GP under certain circumstances.
 	 * E.g. KVM doesn't support offcore event
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
deleted file mode 100644
index 2521f771f2f5..000000000000
--- a/arch/x86/events/intel/cqm.c
+++ /dev/null
@@ -1,1766 +0,0 @@
-/*
- * Intel Cache Quality-of-Service Monitoring (CQM) support.
- *
- * Based very, very heavily on work by Peter Zijlstra.
- */
-
-#include <linux/perf_event.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include <asm/intel_rdt_common.h>
-#include "../perf_event.h"
-
-#define MSR_IA32_QM_CTR		0x0c8e
-#define MSR_IA32_QM_EVTSEL	0x0c8d
-
-#define MBM_CNTR_WIDTH		24
-/*
- * Guaranteed time in ms as per SDM where MBM counters will not overflow.
- */
-#define MBM_CTR_OVERFLOW_TIME	1000
-
-static u32 cqm_max_rmid = -1;
-static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-static bool cqm_enabled, mbm_enabled;
-unsigned int mbm_socket_max;
-
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
-static struct hrtimer *mbm_timers;
-/**
- * struct sample - mbm event's (local or total) data
- * @total_bytes    #bytes since we began monitoring
- * @prev_msr       previous value of MSR
- */
-struct sample {
-	u64	total_bytes;
-	u64	prev_msr;
-};
-
-/*
- * samples profiled for total memory bandwidth type events
- */
-static struct sample *mbm_total;
-/*
- * samples profiled for local memory bandwidth type events
- */
-static struct sample *mbm_local;
-
-#define pkg_id	topology_physical_package_id(smp_processor_id())
-/*
- * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
- * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
- * rmids per socket, an example is given below
- * RMID1 of Socket0:  vrmid =  1
- * RMID1 of Socket1:  vrmid =  1 * (cqm_max_rmid + 1) + 1
- * RMID1 of Socket2:  vrmid =  2 * (cqm_max_rmid + 1) + 1
- */
-#define rmid_2_index(rmid)  ((pkg_id * (cqm_max_rmid + 1)) + rmid)
-/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-/*
- * Groups of events that have the same target(s), one RMID per group.
- */
-static LIST_HEAD(cache_groups);
-
-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-#define RMID_VAL_ERROR		(1ULL << 63)
-#define RMID_VAL_UNAVAIL	(1ULL << 62)
-
-/*
- * Event IDs are used to program IA32_QM_EVTSEL before reading event
- * counter from IA32_QM_CTR
- */
-#define QOS_L3_OCCUP_EVENT_ID	0x01
-#define QOS_MBM_TOTAL_EVENT_ID	0x02
-#define QOS_MBM_LOCAL_EVENT_ID	0x03
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
-
-#define INVALID_RMID		(-1)
-
-/*
- * Is @rmid valid for programming the hardware?
- *
- * rmid 0 is reserved by the hardware for all non-monitored tasks, which
- * means that we should never come across an rmid with that value.
- * Likewise, an rmid value of -1 is used to indicate "no rmid currently
- * assigned" and is used as part of the rotation code.
- */
-static inline bool __rmid_valid(u32 rmid)
-{
-	if (!rmid || rmid == INVALID_RMID)
-		return false;
-
-	return true;
-}
-
-static u64 __rmid_read(u32 rmid)
-{
-	u64 val;
-
-	/*
-	 * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
-	 * it just says that to increase confusion.
-	 */
-	wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
-	rdmsrl(MSR_IA32_QM_CTR, val);
-
-	/*
-	 * Aside from the ERROR and UNAVAIL bits, assume this thing returns
-	 * the number of cachelines tagged with @rmid.
-	 */
-	return val;
-}
-
-enum rmid_recycle_state {
-	RMID_YOUNG = 0,
-	RMID_AVAILABLE,
-	RMID_DIRTY,
-};
-
-struct cqm_rmid_entry {
-	u32 rmid;
-	enum rmid_recycle_state state;
-	struct list_head list;
-	unsigned long queue_time;
-};
-
-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cache_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
-
-/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
- */
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
-{
-	struct cqm_rmid_entry *entry;
-
-	entry = cqm_rmid_ptrs[rmid];
-	WARN_ON(entry->rmid != rmid);
-
-	return entry;
-}
-
-/*
- * Returns < 0 on fail.
- *
- * We expect to be called with cache_mutex held.
- */
-static u32 __get_rmid(void)
-{
-	struct cqm_rmid_entry *entry;
-
-	lockdep_assert_held(&cache_mutex);
-
-	if (list_empty(&cqm_rmid_free_lru))
-		return INVALID_RMID;
-
-	entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
-	list_del(&entry->list);
-
-	return entry->rmid;
-}
-
-static void __put_rmid(u32 rmid)
-{
-	struct cqm_rmid_entry *entry;
-
-	lockdep_assert_held(&cache_mutex);
-
-	WARN_ON(!__rmid_valid(rmid));
-	entry = __rmid_entry(rmid);
-
-	entry->queue_time = jiffies;
-	entry->state = RMID_YOUNG;
-
-	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
-}
-
-static void cqm_cleanup(void)
-{
-	int i;
-
-	if (!cqm_rmid_ptrs)
-		return;
-
-	for (i = 0; i < cqm_max_rmid; i++)
-		kfree(cqm_rmid_ptrs[i]);
-
-	kfree(cqm_rmid_ptrs);
-	cqm_rmid_ptrs = NULL;
-	cqm_enabled = false;
-}
-
-static int intel_cqm_setup_rmid_cache(void)
-{
-	struct cqm_rmid_entry *entry;
-	unsigned int nr_rmids;
-	int r = 0;
-
-	nr_rmids = cqm_max_rmid + 1;
-	cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
-				nr_rmids, GFP_KERNEL);
-	if (!cqm_rmid_ptrs)
-		return -ENOMEM;
-
-	for (; r <= cqm_max_rmid; r++) {
-		struct cqm_rmid_entry *entry;
-
-		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-		if (!entry)
-			goto fail;
-
-		INIT_LIST_HEAD(&entry->list);
-		entry->rmid = r;
-		cqm_rmid_ptrs[r] = entry;
-
-		list_add_tail(&entry->list, &cqm_rmid_free_lru);
-	}
-
-	/*
-	 * RMID 0 is special and is always allocated. It's used for all
-	 * tasks that are not monitored.
-	 */
-	entry = __rmid_entry(0);
-	list_del(&entry->list);
-
-	mutex_lock(&cache_mutex);
-	intel_cqm_rotation_rmid = __get_rmid();
-	mutex_unlock(&cache_mutex);
-
-	return 0;
-
-fail:
-	cqm_cleanup();
-	return -ENOMEM;
-}
-
-/*
- * Determine if @a and @b measure the same set of tasks.
- *
- * If @a and @b measure the same set of tasks then we want to share a
- * single RMID.
- */
-static bool __match_event(struct perf_event *a, struct perf_event *b)
-{
-	/* Per-cpu and task events don't mix */
-	if ((a->attach_state & PERF_ATTACH_TASK) !=
-	    (b->attach_state & PERF_ATTACH_TASK))
-		return false;
-
-#ifdef CONFIG_CGROUP_PERF
-	if (a->cgrp != b->cgrp)
-		return false;
-#endif
-
-	/* If not task event, we're machine wide */
-	if (!(b->attach_state & PERF_ATTACH_TASK))
-		return true;
-
-	/*
-	 * Events that target same task are placed into the same cache group.
-	 * Mark it as a multi event group, so that we update ->count
-	 * for every event rather than just the group leader later.
-	 */
-	if (a->hw.target == b->hw.target) {
-		b->hw.is_group_event = true;
-		return true;
-	}
-
-	/*
-	 * Are we an inherited event?
-	 */
-	if (b->parent == a)
-		return true;
-
-	return false;
-}
-
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
-	if (event->attach_state & PERF_ATTACH_TASK)
-		return perf_cgroup_from_task(event->hw.target, event->ctx);
-
-	return event->cgrp;
-}
-#endif
-
-/*
- * Determine if @a's tasks intersect with @b's tasks
- *
- * There are combinations of events that we explicitly prohibit,
- *
- *		   PROHIBITS
- *     system-wide    -> 	cgroup and task
- *     cgroup 	      ->	system-wide
- *     		      ->	task in cgroup
- *     task 	      -> 	system-wide
- *     		      ->	task in cgroup
- *
- * Call this function before allocating an RMID.
- */
-static bool __conflict_event(struct perf_event *a, struct perf_event *b)
-{
-#ifdef CONFIG_CGROUP_PERF
-	/*
-	 * We can have any number of cgroups but only one system-wide
-	 * event at a time.
-	 */
-	if (a->cgrp && b->cgrp) {
-		struct perf_cgroup *ac = a->cgrp;
-		struct perf_cgroup *bc = b->cgrp;
-
-		/*
-		 * This condition should have been caught in
-		 * __match_event() and we should be sharing an RMID.
-		 */
-		WARN_ON_ONCE(ac == bc);
-
-		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-			return true;
-
-		return false;
-	}
-
-	if (a->cgrp || b->cgrp) {
-		struct perf_cgroup *ac, *bc;
-
-		/*
-		 * cgroup and system-wide events are mutually exclusive
-		 */
-		if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
-		    (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
-			return true;
-
-		/*
-		 * Ensure neither event is part of the other's cgroup
-		 */
-		ac = event_to_cgroup(a);
-		bc = event_to_cgroup(b);
-		if (ac == bc)
-			return true;
-
-		/*
-		 * Must have cgroup and non-intersecting task events.
-		 */
-		if (!ac || !bc)
-			return false;
-
-		/*
-		 * We have cgroup and task events, and the task belongs
-		 * to a cgroup. Check for for overlap.
-		 */
-		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-			return true;
-
-		return false;
-	}
-#endif
-	/*
-	 * If one of them is not a task, same story as above with cgroups.
-	 */
-	if (!(a->attach_state & PERF_ATTACH_TASK) ||
-	    !(b->attach_state & PERF_ATTACH_TASK))
-		return true;
-
-	/*
-	 * Must be non-overlapping.
-	 */
-	return false;
-}
-
-struct rmid_read {
-	u32 rmid;
-	u32 evt_type;
-	atomic64_t value;
-};
-
-static void __intel_cqm_event_count(void *info);
-static void init_mbm_sample(u32 rmid, u32 evt_type);
-static void __intel_mbm_event_count(void *info);
-
-static bool is_cqm_event(int e)
-{
-	return (e == QOS_L3_OCCUP_EVENT_ID);
-}
-
-static bool is_mbm_event(int e)
-{
-	return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
-}
-
-static void cqm_mask_call(struct rmid_read *rr)
-{
-	if (is_mbm_event(rr->evt_type))
-		on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
-	else
-		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
-}
-
-/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
-	struct perf_event *event;
-	struct list_head *head = &group->hw.cqm_group_entry;
-	u32 old_rmid = group->hw.cqm_rmid;
-
-	lockdep_assert_held(&cache_mutex);
-
-	/*
-	 * If our RMID is being deallocated, perform a read now.
-	 */
-	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
-		struct rmid_read rr = {
-			.rmid = old_rmid,
-			.evt_type = group->attr.config,
-			.value = ATOMIC64_INIT(0),
-		};
-
-		cqm_mask_call(&rr);
-		local64_set(&group->count, atomic64_read(&rr.value));
-	}
-
-	raw_spin_lock_irq(&cache_lock);
-
-	group->hw.cqm_rmid = rmid;
-	list_for_each_entry(event, head, hw.cqm_group_entry)
-		event->hw.cqm_rmid = rmid;
-
-	raw_spin_unlock_irq(&cache_lock);
-
-	/*
-	 * If the allocation is for mbm, init the mbm stats.
-	 * Need to check if each event in the group is mbm event
-	 * because there could be multiple type of events in the same group.
-	 */
-	if (__rmid_valid(rmid)) {
-		event = group;
-		if (is_mbm_event(event->attr.config))
-			init_mbm_sample(rmid, event->attr.config);
-
-		list_for_each_entry(event, head, hw.cqm_group_entry) {
-			if (is_mbm_event(event->attr.config))
-				init_mbm_sample(rmid, event->attr.config);
-		}
-	}
-
-	return old_rmid;
-}
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
-	struct cqm_rmid_entry *entry;
-
-	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-		if (entry->state != RMID_AVAILABLE)
-			break;
-
-		if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
-			entry->state = RMID_DIRTY;
-	}
-}
-
-/*
- * If we have group events waiting for an RMID that don't conflict with
- * events already running, assign @rmid.
- */
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
-	struct perf_event *leader, *event;
-
-	lockdep_assert_held(&cache_mutex);
-
-	leader = list_first_entry(&cache_groups, struct perf_event,
-				  hw.cqm_groups_entry);
-	event = leader;
-
-	list_for_each_entry_continue(event, &cache_groups,
-				     hw.cqm_groups_entry) {
-		if (__rmid_valid(event->hw.cqm_rmid))
-			continue;
-
-		if (__conflict_event(event, leader))
-			continue;
-
-		intel_cqm_xchg_rmid(event, rmid);
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250	/* ms */
-
-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
-	struct cqm_rmid_entry *entry, *tmp;
-
-	lockdep_assert_held(&cache_mutex);
-
-	*available = 0;
-	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-		unsigned long min_queue_time;
-		unsigned long now = jiffies;
-
-		/*
-		 * We hold RMIDs placed into limbo for a minimum queue
-		 * time. Before the minimum queue time has elapsed we do
-		 * not recycle RMIDs.
-		 *
-		 * The reasoning is that until a sufficient time has
-		 * passed since we stopped using an RMID, any RMID
-		 * placed onto the limbo list will likely still have
-		 * data tagged in the cache, which means we'll probably
-		 * fail to recycle it anyway.
-		 *
-		 * We can save ourselves an expensive IPI by skipping
-		 * any RMIDs that have not been queued for the minimum
-		 * time.
-		 */
-		min_queue_time = entry->queue_time +
-			msecs_to_jiffies(__rmid_queue_time_ms);
-
-		if (time_after(min_queue_time, now))
-			break;
-
-		entry->state = RMID_AVAILABLE;
-		(*available)++;
-	}
-
-	/*
-	 * Fast return if none of the RMIDs on the limbo list have been
-	 * sitting on the queue for the minimum queue time.
-	 */
-	if (!*available)
-		return false;
-
-	/*
-	 * Test whether an RMID is free for each package.
-	 */
-	on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
-	list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
-		/*
-		 * Exhausted all RMIDs that have waited min queue time.
-		 */
-		if (entry->state == RMID_YOUNG)
-			break;
-
-		if (entry->state == RMID_DIRTY)
-			continue;
-
-		list_del(&entry->list);	/* remove from limbo */
-
-		/*
-		 * The rotation RMID gets priority if it's
-		 * currently invalid. In which case, skip adding
-		 * the RMID to the the free lru.
-		 */
-		if (!__rmid_valid(intel_cqm_rotation_rmid)) {
-			intel_cqm_rotation_rmid = entry->rmid;
-			continue;
-		}
-
-		/*
-		 * If we have groups waiting for RMIDs, hand
-		 * them one now provided they don't conflict.
-		 */
-		if (intel_cqm_sched_in_event(entry->rmid))
-			continue;
-
-		/*
-		 * Otherwise place it onto the free list.
-		 */
-		list_add_tail(&entry->list, &cqm_rmid_free_lru);
-	}
-
-
-	return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
-	struct perf_event *rotor;
-	u32 rmid;
-
-	lockdep_assert_held(&cache_mutex);
-
-	rotor = list_first_entry(&cache_groups, struct perf_event,
-				 hw.cqm_groups_entry);
-
-	/*
-	 * The group at the front of the list should always have a valid
-	 * RMID. If it doesn't then no groups have RMIDs assigned and we
-	 * don't need to rotate the list.
-	 */
-	if (next == rotor)
-		return;
-
-	rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
-	__put_rmid(rmid);
-
-	list_rotate_left(&cache_groups);
-}
-
-/*
- * Deallocate the RMIDs from any events that conflict with @event, and
- * place them on the back of the group list.
- */
-static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
-{
-	struct perf_event *group, *g;
-	u32 rmid;
-
-	lockdep_assert_held(&cache_mutex);
-
-	list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
-		if (group == event)
-			continue;
-
-		rmid = group->hw.cqm_rmid;
-
-		/*
-		 * Skip events that don't have a valid RMID.
-		 */
-		if (!__rmid_valid(rmid))
-			continue;
-
-		/*
-		 * No conflict? No problem! Leave the event alone.
-		 */
-		if (!__conflict_event(group, event))
-			continue;
-
-		intel_cqm_xchg_rmid(group, INVALID_RMID);
-		__put_rmid(rmid);
-	}
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * We rotate for two reasons,
- *   1. To handle the scheduling of conflicting events
- *   2. To recycle RMIDs
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
-	struct perf_event *group, *start = NULL;
-	unsigned int threshold_limit;
-	unsigned int nr_needed = 0;
-	unsigned int nr_available;
-	bool rotated = false;
-
-	mutex_lock(&cache_mutex);
-
-again:
-	/*
-	 * Fast path through this function if there are no groups and no
-	 * RMIDs that need cleaning.
-	 */
-	if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
-		goto out;
-
-	list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
-		if (!__rmid_valid(group->hw.cqm_rmid)) {
-			if (!start)
-				start = group;
-			nr_needed++;
-		}
-	}
-
-	/*
-	 * We have some event groups, but they all have RMIDs assigned
-	 * and no RMIDs need cleaning.
-	 */
-	if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
-		goto out;
-
-	if (!nr_needed)
-		goto stabilize;
-
-	/*
-	 * We have more event groups without RMIDs than available RMIDs,
-	 * or we have event groups that conflict with the ones currently
-	 * scheduled.
-	 *
-	 * We force deallocate the rmid of the group at the head of
-	 * cache_groups. The first event group without an RMID then gets
-	 * assigned intel_cqm_rotation_rmid. This ensures we always make
-	 * forward progress.
-	 *
-	 * Rotate the cache_groups list so the previous head is now the
-	 * tail.
-	 */
-	__intel_cqm_pick_and_rotate(start);
-
-	/*
-	 * If the rotation is going to succeed, reduce the threshold so
-	 * that we don't needlessly reuse dirty RMIDs.
-	 */
-	if (__rmid_valid(intel_cqm_rotation_rmid)) {
-		intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
-		intel_cqm_rotation_rmid = __get_rmid();
-
-		intel_cqm_sched_out_conflicting_events(start);
-
-		if (__intel_cqm_threshold)
-			__intel_cqm_threshold--;
-	}
-
-	rotated = true;
-
-stabilize:
-	/*
-	 * We now need to stablize the RMID we freed above (if any) to
-	 * ensure that the next time we rotate we have an RMID with zero
-	 * occupancy value.
-	 *
-	 * Alternatively, if we didn't need to perform any rotation,
-	 * we'll have a bunch of RMIDs in limbo that need stabilizing.
-	 */
-	threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
-	while (intel_cqm_rmid_stabilize(&nr_available) &&
-	       __intel_cqm_threshold < threshold_limit) {
-		unsigned int steal_limit;
-
-		/*
-		 * Don't spin if nobody is actively waiting for an RMID,
-		 * the rotation worker will be kicked as soon as an
-		 * event needs an RMID anyway.
-		 */
-		if (!nr_needed)
-			break;
-
-		/* Allow max 25% of RMIDs to be in limbo. */
-		steal_limit = (cqm_max_rmid + 1) / 4;
-
-		/*
-		 * We failed to stabilize any RMIDs so our rotation
-		 * logic is now stuck. In order to make forward progress
-		 * we have a few options:
-		 *
-		 *   1. rotate ("steal") another RMID
-		 *   2. increase the threshold
-		 *   3. do nothing
-		 *
-		 * We do both of 1. and 2. until we hit the steal limit.
-		 *
-		 * The steal limit prevents all RMIDs ending up on the
-		 * limbo list. This can happen if every RMID has a
-		 * non-zero occupancy above threshold_limit, and the
-		 * occupancy values aren't dropping fast enough.
-		 *
-		 * Note that there is prioritisation at work here - we'd
-		 * rather increase the number of RMIDs on the limbo list
-		 * than increase the threshold, because increasing the
-		 * threshold skews the event data (because we reuse
-		 * dirty RMIDs) - threshold bumps are a last resort.
-		 */
-		if (nr_available < steal_limit)
-			goto again;
-
-		__intel_cqm_threshold++;
-	}
-
-out:
-	mutex_unlock(&cache_mutex);
-	return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
-static struct pmu intel_cqm_pmu;
-
-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
-	unsigned long delay;
-
-	__intel_cqm_rmid_rotate();
-
-	delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
-	schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
-static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
-{
-	struct sample *mbm_current;
-	u32 vrmid = rmid_2_index(rmid);
-	u64 val, bytes, shift;
-	u32 eventid;
-
-	if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
-		mbm_current = &mbm_local[vrmid];
-		eventid     = QOS_MBM_LOCAL_EVENT_ID;
-	} else {
-		mbm_current = &mbm_total[vrmid];
-		eventid     = QOS_MBM_TOTAL_EVENT_ID;
-	}
-
-	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
-	rdmsrl(MSR_IA32_QM_CTR, val);
-	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		return mbm_current->total_bytes;
-
-	if (first) {
-		mbm_current->prev_msr = val;
-		mbm_current->total_bytes = 0;
-		return mbm_current->total_bytes;
-	}
-
-	/*
-	 * The h/w guarantees that counters will not overflow
-	 * so long as we poll them at least once per second.
-	 */
-	shift = 64 - MBM_CNTR_WIDTH;
-	bytes = (val << shift) - (mbm_current->prev_msr << shift);
-	bytes >>= shift;
-
-	bytes *= cqm_l3_scale;
-
-	mbm_current->total_bytes += bytes;
-	mbm_current->prev_msr = val;
-
-	return mbm_current->total_bytes;
-}
-
-static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
-{
-	return update_sample(rmid, evt_type, 0);
-}
-
-static void __intel_mbm_event_init(void *info)
-{
-	struct rmid_read *rr = info;
-
-	update_sample(rr->rmid, rr->evt_type, 1);
-}
-
-static void init_mbm_sample(u32 rmid, u32 evt_type)
-{
-	struct rmid_read rr = {
-		.rmid = rmid,
-		.evt_type = evt_type,
-		.value = ATOMIC64_INIT(0),
-	};
-
-	/* on each socket, init sample */
-	on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
-}
-
-/*
- * Find a group and setup RMID.
- *
- * If we're part of a group, we use the group's RMID.
- */
-static void intel_cqm_setup_event(struct perf_event *event,
-				  struct perf_event **group)
-{
-	struct perf_event *iter;
-	bool conflict = false;
-	u32 rmid;
-
-	event->hw.is_group_event = false;
-	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
-		rmid = iter->hw.cqm_rmid;
-
-		if (__match_event(iter, event)) {
-			/* All tasks in a group share an RMID */
-			event->hw.cqm_rmid = rmid;
-			*group = iter;
-			if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
-				init_mbm_sample(rmid, event->attr.config);
-			return;
-		}
-
-		/*
-		 * We only care about conflicts for events that are
-		 * actually scheduled in (and hence have a valid RMID).
-		 */
-		if (__conflict_event(iter, event) && __rmid_valid(rmid))
-			conflict = true;
-	}
-
-	if (conflict)
-		rmid = INVALID_RMID;
-	else
-		rmid = __get_rmid();
-
-	if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
-		init_mbm_sample(rmid, event->attr.config);
-
-	event->hw.cqm_rmid = rmid;
-}
-
-static void intel_cqm_event_read(struct perf_event *event)
-{
-	unsigned long flags;
-	u32 rmid;
-	u64 val;
-
-	/*
-	 * Task events are handled by intel_cqm_event_count().
-	 */
-	if (event->cpu == -1)
-		return;
-
-	raw_spin_lock_irqsave(&cache_lock, flags);
-	rmid = event->hw.cqm_rmid;
-
-	if (!__rmid_valid(rmid))
-		goto out;
-
-	if (is_mbm_event(event->attr.config))
-		val = rmid_read_mbm(rmid, event->attr.config);
-	else
-		val = __rmid_read(rmid);
-
-	/*
-	 * Ignore this reading on error states and do not update the value.
-	 */
-	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		goto out;
-
-	local64_set(&event->count, val);
-out:
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-static void __intel_cqm_event_count(void *info)
-{
-	struct rmid_read *rr = info;
-	u64 val;
-
-	val = __rmid_read(rr->rmid);
-
-	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		return;
-
-	atomic64_add(val, &rr->value);
-}
-
-static inline bool cqm_group_leader(struct perf_event *event)
-{
-	return !list_empty(&event->hw.cqm_groups_entry);
-}
-
-static void __intel_mbm_event_count(void *info)
-{
-	struct rmid_read *rr = info;
-	u64 val;
-
-	val = rmid_read_mbm(rr->rmid, rr->evt_type);
-	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		return;
-	atomic64_add(val, &rr->value);
-}
-
-static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
-{
-	struct perf_event *iter, *iter1;
-	int ret = HRTIMER_RESTART;
-	struct list_head *head;
-	unsigned long flags;
-	u32 grp_rmid;
-
-	/*
-	 * Need to cache_lock as the timer Event Select MSR reads
-	 * can race with the mbm/cqm count() and mbm_init() reads.
-	 */
-	raw_spin_lock_irqsave(&cache_lock, flags);
-
-	if (list_empty(&cache_groups)) {
-		ret = HRTIMER_NORESTART;
-		goto out;
-	}
-
-	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
-		grp_rmid = iter->hw.cqm_rmid;
-		if (!__rmid_valid(grp_rmid))
-			continue;
-		if (is_mbm_event(iter->attr.config))
-			update_sample(grp_rmid, iter->attr.config, 0);
-
-		head = &iter->hw.cqm_group_entry;
-		if (list_empty(head))
-			continue;
-		list_for_each_entry(iter1, head, hw.cqm_group_entry) {
-			if (!iter1->hw.is_group_event)
-				break;
-			if (is_mbm_event(iter1->attr.config))
-				update_sample(iter1->hw.cqm_rmid,
-					      iter1->attr.config, 0);
-		}
-	}
-
-	hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
-out:
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-
-	return ret;
-}
-
-static void __mbm_start_timer(void *info)
-{
-	hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
-			     HRTIMER_MODE_REL_PINNED);
-}
-
-static void __mbm_stop_timer(void *info)
-{
-	hrtimer_cancel(&mbm_timers[pkg_id]);
-}
-
-static void mbm_start_timers(void)
-{
-	on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
-}
-
-static void mbm_stop_timers(void)
-{
-	on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
-}
-
-static void mbm_hrtimer_init(void)
-{
-	struct hrtimer *hr;
-	int i;
-
-	for (i = 0; i < mbm_socket_max; i++) {
-		hr = &mbm_timers[i];
-		hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		hr->function = mbm_hrtimer_handle;
-	}
-}
-
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
-	unsigned long flags;
-	struct rmid_read rr = {
-		.evt_type = event->attr.config,
-		.value = ATOMIC64_INIT(0),
-	};
-
-	/*
-	 * We only need to worry about task events. System-wide events
-	 * are handled like usual, i.e. entirely with
-	 * intel_cqm_event_read().
-	 */
-	if (event->cpu != -1)
-		return __perf_event_count(event);
-
-	/*
-	 * Only the group leader gets to report values except in case of
-	 * multiple events in the same group, we still need to read the
-	 * other events.This stops us
-	 * reporting duplicate values to userspace, and gives us a clear
-	 * rule for which task gets to report the values.
-	 *
-	 * Note that it is impossible to attribute these values to
-	 * specific packages - we forfeit that ability when we create
-	 * task events.
-	 */
-	if (!cqm_group_leader(event) && !event->hw.is_group_event)
-		return 0;
-
-	/*
-	 * Getting up-to-date values requires an SMP IPI which is not
-	 * possible if we're being called in interrupt context. Return
-	 * the cached values instead.
-	 */
-	if (unlikely(in_interrupt()))
-		goto out;
-
-	/*
-	 * Notice that we don't perform the reading of an RMID
-	 * atomically, because we can't hold a spin lock across the
-	 * IPIs.
-	 *
-	 * Speculatively perform the read, since @event might be
-	 * assigned a different (possibly invalid) RMID while we're
-	 * busying performing the IPI calls. It's therefore necessary to
-	 * check @event's RMID afterwards, and if it has changed,
-	 * discard the result of the read.
-	 */
-	rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
-	if (!__rmid_valid(rr.rmid))
-		goto out;
-
-	cqm_mask_call(&rr);
-
-	raw_spin_lock_irqsave(&cache_lock, flags);
-	if (event->hw.cqm_rmid == rr.rmid)
-		local64_set(&event->count, atomic64_read(&rr.value));
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
-	return __perf_event_count(event);
-}
-
-static void intel_cqm_event_start(struct perf_event *event, int mode)
-{
-	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-	u32 rmid = event->hw.cqm_rmid;
-
-	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
-		return;
-
-	event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
-	if (state->rmid_usecnt++) {
-		if (!WARN_ON_ONCE(state->rmid != rmid))
-			return;
-	} else {
-		WARN_ON_ONCE(state->rmid);
-	}
-
-	state->rmid = rmid;
-	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-static void intel_cqm_event_stop(struct perf_event *event, int mode)
-{
-	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
-	if (event->hw.cqm_state & PERF_HES_STOPPED)
-		return;
-
-	event->hw.cqm_state |= PERF_HES_STOPPED;
-
-	intel_cqm_event_read(event);
-
-	if (!--state->rmid_usecnt) {
-		state->rmid = 0;
-		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
-	} else {
-		WARN_ON_ONCE(!state->rmid);
-	}
-}
-
-static int intel_cqm_event_add(struct perf_event *event, int mode)
-{
-	unsigned long flags;
-	u32 rmid;
-
-	raw_spin_lock_irqsave(&cache_lock, flags);
-
-	event->hw.cqm_state = PERF_HES_STOPPED;
-	rmid = event->hw.cqm_rmid;
-
-	if (__rmid_valid(rmid) && (mode & PERF_EF_START))
-		intel_cqm_event_start(event, mode);
-
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-
-	return 0;
-}
-
-static void intel_cqm_event_destroy(struct perf_event *event)
-{
-	struct perf_event *group_other = NULL;
-	unsigned long flags;
-
-	mutex_lock(&cache_mutex);
-	/*
-	* Hold the cache_lock as mbm timer handlers could be
-	* scanning the list of events.
-	*/
-	raw_spin_lock_irqsave(&cache_lock, flags);
-
-	/*
-	 * If there's another event in this group...
-	 */
-	if (!list_empty(&event->hw.cqm_group_entry)) {
-		group_other = list_first_entry(&event->hw.cqm_group_entry,
-					       struct perf_event,
-					       hw.cqm_group_entry);
-		list_del(&event->hw.cqm_group_entry);
-	}
-
-	/*
-	 * And we're the group leader..
-	 */
-	if (cqm_group_leader(event)) {
-		/*
-		 * If there was a group_other, make that leader, otherwise
-		 * destroy the group and return the RMID.
-		 */
-		if (group_other) {
-			list_replace(&event->hw.cqm_groups_entry,
-				     &group_other->hw.cqm_groups_entry);
-		} else {
-			u32 rmid = event->hw.cqm_rmid;
-
-			if (__rmid_valid(rmid))
-				__put_rmid(rmid);
-			list_del(&event->hw.cqm_groups_entry);
-		}
-	}
-
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-
-	/*
-	 * Stop the mbm overflow timers when the last event is destroyed.
-	*/
-	if (mbm_enabled && list_empty(&cache_groups))
-		mbm_stop_timers();
-
-	mutex_unlock(&cache_mutex);
-}
-
-static int intel_cqm_event_init(struct perf_event *event)
-{
-	struct perf_event *group = NULL;
-	bool rotate = false;
-	unsigned long flags;
-
-	if (event->attr.type != intel_cqm_pmu.type)
-		return -ENOENT;
-
-	if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
-	     (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
-		return -EINVAL;
-
-	if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
-	    (is_mbm_event(event->attr.config) && !mbm_enabled))
-		return -EINVAL;
-
-	/* unsupported modes and filters */
-	if (event->attr.exclude_user   ||
-	    event->attr.exclude_kernel ||
-	    event->attr.exclude_hv     ||
-	    event->attr.exclude_idle   ||
-	    event->attr.exclude_host   ||
-	    event->attr.exclude_guest  ||
-	    event->attr.sample_period) /* no sampling */
-		return -EINVAL;
-
-	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
-	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
-
-	event->destroy = intel_cqm_event_destroy;
-
-	mutex_lock(&cache_mutex);
-
-	/*
-	 * Start the mbm overflow timers when the first event is created.
-	*/
-	if (mbm_enabled && list_empty(&cache_groups))
-		mbm_start_timers();
-
-	/* Will also set rmid */
-	intel_cqm_setup_event(event, &group);
-
-	/*
-	* Hold the cache_lock as mbm timer handlers be
-	* scanning the list of events.
-	*/
-	raw_spin_lock_irqsave(&cache_lock, flags);
-
-	if (group) {
-		list_add_tail(&event->hw.cqm_group_entry,
-			      &group->hw.cqm_group_entry);
-	} else {
-		list_add_tail(&event->hw.cqm_groups_entry,
-			      &cache_groups);
-
-		/*
-		 * All RMIDs are either in use or have recently been
-		 * used. Kick the rotation worker to clean/free some.
-		 *
-		 * We only do this for the group leader, rather than for
-		 * every event in a group to save on needless work.
-		 */
-		if (!__rmid_valid(event->hw.cqm_rmid))
-			rotate = true;
-	}
-
-	raw_spin_unlock_irqrestore(&cache_lock, flags);
-	mutex_unlock(&cache_mutex);
-
-	if (rotate)
-		schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
-	return 0;
-}
-
-EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
-EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
-EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
-EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
-EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
-
-EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
-EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
-EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
-EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
-
-EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
-EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
-EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
-EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
-
-static struct attribute *intel_cqm_events_attr[] = {
-	EVENT_PTR(intel_cqm_llc),
-	EVENT_PTR(intel_cqm_llc_pkg),
-	EVENT_PTR(intel_cqm_llc_unit),
-	EVENT_PTR(intel_cqm_llc_scale),
-	EVENT_PTR(intel_cqm_llc_snapshot),
-	NULL,
-};
-
-static struct attribute *intel_mbm_events_attr[] = {
-	EVENT_PTR(intel_cqm_total_bytes),
-	EVENT_PTR(intel_cqm_local_bytes),
-	EVENT_PTR(intel_cqm_total_bytes_pkg),
-	EVENT_PTR(intel_cqm_local_bytes_pkg),
-	EVENT_PTR(intel_cqm_total_bytes_unit),
-	EVENT_PTR(intel_cqm_local_bytes_unit),
-	EVENT_PTR(intel_cqm_total_bytes_scale),
-	EVENT_PTR(intel_cqm_local_bytes_scale),
-	NULL,
-};
-
-static struct attribute *intel_cmt_mbm_events_attr[] = {
-	EVENT_PTR(intel_cqm_llc),
-	EVENT_PTR(intel_cqm_total_bytes),
-	EVENT_PTR(intel_cqm_local_bytes),
-	EVENT_PTR(intel_cqm_llc_pkg),
-	EVENT_PTR(intel_cqm_total_bytes_pkg),
-	EVENT_PTR(intel_cqm_local_bytes_pkg),
-	EVENT_PTR(intel_cqm_llc_unit),
-	EVENT_PTR(intel_cqm_total_bytes_unit),
-	EVENT_PTR(intel_cqm_local_bytes_unit),
-	EVENT_PTR(intel_cqm_llc_scale),
-	EVENT_PTR(intel_cqm_total_bytes_scale),
-	EVENT_PTR(intel_cqm_local_bytes_scale),
-	EVENT_PTR(intel_cqm_llc_snapshot),
-	NULL,
-};
-
-static struct attribute_group intel_cqm_events_group = {
-	.name = "events",
-	.attrs = NULL,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-static struct attribute *intel_cqm_formats_attr[] = {
-	&format_attr_event.attr,
-	NULL,
-};
-
-static struct attribute_group intel_cqm_format_group = {
-	.name = "format",
-	.attrs = intel_cqm_formats_attr,
-};
-
-static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
-			   char *page)
-{
-	ssize_t rv;
-
-	mutex_lock(&cache_mutex);
-	rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
-	mutex_unlock(&cache_mutex);
-
-	return rv;
-}
-
-static ssize_t
-max_recycle_threshold_store(struct device *dev,
-			    struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	unsigned int bytes, cachelines;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &bytes);
-	if (ret)
-		return ret;
-
-	mutex_lock(&cache_mutex);
-
-	__intel_cqm_max_threshold = bytes;
-	cachelines = bytes / cqm_l3_scale;
-
-	/*
-	 * The new maximum takes effect immediately.
-	 */
-	if (__intel_cqm_threshold > cachelines)
-		__intel_cqm_threshold = cachelines;
-
-	mutex_unlock(&cache_mutex);
-
-	return count;
-}
-
-static DEVICE_ATTR_RW(max_recycle_threshold);
-
-static struct attribute *intel_cqm_attrs[] = {
-	&dev_attr_max_recycle_threshold.attr,
-	NULL,
-};
-
-static const struct attribute_group intel_cqm_group = {
-	.attrs = intel_cqm_attrs,
-};
-
-static const struct attribute_group *intel_cqm_attr_groups[] = {
-	&intel_cqm_events_group,
-	&intel_cqm_format_group,
-	&intel_cqm_group,
-	NULL,
-};
-
-static struct pmu intel_cqm_pmu = {
-	.hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
-	.attr_groups	     = intel_cqm_attr_groups,
-	.task_ctx_nr	     = perf_sw_context,
-	.event_init	     = intel_cqm_event_init,
-	.add		     = intel_cqm_event_add,
-	.del		     = intel_cqm_event_stop,
-	.start		     = intel_cqm_event_start,
-	.stop		     = intel_cqm_event_stop,
-	.read		     = intel_cqm_event_read,
-	.count		     = intel_cqm_event_count,
-};
-
-static inline void cqm_pick_event_reader(int cpu)
-{
-	int reader;
-
-	/* First online cpu in package becomes the reader */
-	reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
-	if (reader >= nr_cpu_ids)
-		cpumask_set_cpu(cpu, &cqm_cpumask);
-}
-
-static int intel_cqm_cpu_starting(unsigned int cpu)
-{
-	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	state->rmid = 0;
-	state->closid = 0;
-	state->rmid_usecnt = 0;
-
-	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
-	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-
-	cqm_pick_event_reader(cpu);
-	return 0;
-}
-
-static int intel_cqm_cpu_exit(unsigned int cpu)
-{
-	int target;
-
-	/* Is @cpu the current cqm reader for this package ? */
-	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
-		return 0;
-
-	/* Find another online reader in this package */
-	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
-
-	if (target < nr_cpu_ids)
-		cpumask_set_cpu(target, &cqm_cpumask);
-
-	return 0;
-}
-
-static const struct x86_cpu_id intel_cqm_match[] = {
-	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
-	{}
-};
-
-static void mbm_cleanup(void)
-{
-	if (!mbm_enabled)
-		return;
-
-	kfree(mbm_local);
-	kfree(mbm_total);
-	mbm_enabled = false;
-}
-
-static const struct x86_cpu_id intel_mbm_local_match[] = {
-	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
-	{}
-};
-
-static const struct x86_cpu_id intel_mbm_total_match[] = {
-	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
-	{}
-};
-
-static int intel_mbm_init(void)
-{
-	int ret = 0, array_size, maxid = cqm_max_rmid + 1;
-
-	mbm_socket_max = topology_max_packages();
-	array_size = sizeof(struct sample) * maxid * mbm_socket_max;
-	mbm_local = kmalloc(array_size, GFP_KERNEL);
-	if (!mbm_local)
-		return -ENOMEM;
-
-	mbm_total = kmalloc(array_size, GFP_KERNEL);
-	if (!mbm_total) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	array_size = sizeof(struct hrtimer) * mbm_socket_max;
-	mbm_timers = kmalloc(array_size, GFP_KERNEL);
-	if (!mbm_timers) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	mbm_hrtimer_init();
-
-out:
-	if (ret)
-		mbm_cleanup();
-
-	return ret;
-}
-
-static int __init intel_cqm_init(void)
-{
-	char *str = NULL, scale[20];
-	int cpu, ret;
-
-	if (x86_match_cpu(intel_cqm_match))
-		cqm_enabled = true;
-
-	if (x86_match_cpu(intel_mbm_local_match) &&
-	     x86_match_cpu(intel_mbm_total_match))
-		mbm_enabled = true;
-
-	if (!cqm_enabled && !mbm_enabled)
-		return -ENODEV;
-
-	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
-
-	/*
-	 * It's possible that not all resources support the same number
-	 * of RMIDs. Instead of making scheduling much more complicated
-	 * (where we have to match a task's RMID to a cpu that supports
-	 * that many RMIDs) just find the minimum RMIDs supported across
-	 * all cpus.
-	 *
-	 * Also, check that the scales match on all cpus.
-	 */
-	cpus_read_lock();
-	for_each_online_cpu(cpu) {
-		struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-		if (c->x86_cache_max_rmid < cqm_max_rmid)
-			cqm_max_rmid = c->x86_cache_max_rmid;
-
-		if (c->x86_cache_occ_scale != cqm_l3_scale) {
-			pr_err("Multiple LLC scale values, disabling\n");
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	/*
-	 * A reasonable upper limit on the max threshold is the number
-	 * of lines tagged per RMID if all RMIDs have the same number of
-	 * lines tagged in the LLC.
-	 *
-	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
-	 */
-	__intel_cqm_max_threshold =
-		boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
-
-	snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
-	str = kstrdup(scale, GFP_KERNEL);
-	if (!str) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	event_attr_intel_cqm_llc_scale.event_str = str;
-
-	ret = intel_cqm_setup_rmid_cache();
-	if (ret)
-		goto out;
-
-	if (mbm_enabled)
-		ret = intel_mbm_init();
-	if (ret && !cqm_enabled)
-		goto out;
-
-	if (cqm_enabled && mbm_enabled)
-		intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
-	else if (!cqm_enabled && mbm_enabled)
-		intel_cqm_events_group.attrs = intel_mbm_events_attr;
-	else if (cqm_enabled && !mbm_enabled)
-		intel_cqm_events_group.attrs = intel_cqm_events_attr;
-
-	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
-	if (ret) {
-		pr_err("Intel CQM perf registration failed: %d\n", ret);
-		goto out;
-	}
-
-	if (cqm_enabled)
-		pr_info("Intel CQM monitoring enabled\n");
-	if (mbm_enabled)
-		pr_info("Intel MBM enabled\n");
-
-	/*
-	 * Setup the hot cpu notifier once we are sure cqm
-	 * is enabled to avoid notifier leak.
-	 */
-	cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_STARTING,
-				     "perf/x86/cqm:starting",
-				     intel_cqm_cpu_starting, NULL);
-	cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_ONLINE,
-				     "perf/x86/cqm:online",
-				     NULL, intel_cqm_cpu_exit);
-out:
-	cpus_read_unlock();
-
-	if (ret) {
-		kfree(str);
-		cqm_cleanup();
-		mbm_cleanup();
-	}
-
-	return ret;
-}
-device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a322fed5f8ed..e1965e5ff570 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -49,34 +49,47 @@ union intel_x86_pebs_dse {
  */
 #define P(a, b) PERF_MEM_S(a, b)
 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
 
 /* Version for Sandy Bridge and later */
 static u64 pebs_data_source[] = {
-	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
-	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
-	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
-	OP_LH | P(LVL, L2)  | P(SNOOP, NONE),	/* 0x03: L2 hit */
-	OP_LH | P(LVL, L3)  | P(SNOOP, NONE),	/* 0x04: L3 hit */
-	OP_LH | P(LVL, L3)  | P(SNOOP, MISS),	/* 0x05: L3 hit, snoop miss */
-	OP_LH | P(LVL, L3)  | P(SNOOP, HIT),	/* 0x06: L3 hit, snoop hit */
-	OP_LH | P(LVL, L3)  | P(SNOOP, HITM),	/* 0x07: L3 hit, snoop hitm */
-	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
-	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
-	OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
-	OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
-	OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
-	OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
-	OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
-	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
+	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
+	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
+	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
+	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
+	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
+	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
+	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
+	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
+	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
 /* Patch up minor differences in the bits */
 void __init intel_pmu_pebs_data_source_nhm(void)
 {
-	pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
-	pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
-	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
+
+	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
+	pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+	pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
+	pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
 }
 
 static u64 precise_store_data(u64 status)
@@ -149,8 +162,6 @@ static u64 load_latency_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
 	u64 val;
-	int model = boot_cpu_data.x86_model;
-	int fam = boot_cpu_data.x86;
 
 	dse.val = status;
 
@@ -162,8 +173,7 @@ static u64 load_latency_data(u64 status)
 	/*
 	 * Nehalem models do not support TLB, Lock infos
 	 */
-	if (fam == 0x6 && (model == 26 || model == 30
-	    || model == 31 || model == 46)) {
+	if (x86_pmu.pebs_no_tlb) {
 		val |= P(TLB, NA) | P(LOCK, NA);
 		return val;
 	}
@@ -1175,7 +1185,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	else
 		regs->flags &= ~PERF_EFLAGS_EXACT;
 
-	if ((sample_type & PERF_SAMPLE_ADDR) &&
+	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
 		data->addr = pebs->dla;
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 955457a30197..8a6bbacd17dc 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -109,6 +109,9 @@ enum {
 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
 	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
+
+	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
+
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -514,6 +517,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 		cpuc->lbr_entries[i].in_tx	= 0;
 		cpuc->lbr_entries[i].abort	= 0;
 		cpuc->lbr_entries[i].cycles	= 0;
+		cpuc->lbr_entries[i].type	= 0;
 		cpuc->lbr_entries[i].reserved	= 0;
 	}
 	cpuc->lbr_stack.nr = i;
@@ -600,6 +604,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		cpuc->lbr_entries[out].in_tx	 = in_tx;
 		cpuc->lbr_entries[out].abort	 = abort;
 		cpuc->lbr_entries[out].cycles	 = cycles;
+		cpuc->lbr_entries[out].type	 = 0;
 		cpuc->lbr_entries[out].reserved	 = 0;
 		out++;
 	}
@@ -677,6 +682,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+		mask |= X86_BR_TYPE_SAVE;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -930,6 +939,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 	return ret;
 }
 
+#define X86_BR_TYPE_MAP_MAX	16
+
+static int branch_map[X86_BR_TYPE_MAP_MAX] = {
+	PERF_BR_CALL,		/* X86_BR_CALL */
+	PERF_BR_RET,		/* X86_BR_RET */
+	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
+	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
+	PERF_BR_UNKNOWN,	/* X86_BR_INT */
+	PERF_BR_UNKNOWN,	/* X86_BR_IRET */
+	PERF_BR_COND,		/* X86_BR_JCC */
+	PERF_BR_UNCOND,		/* X86_BR_JMP */
+	PERF_BR_UNKNOWN,	/* X86_BR_IRQ */
+	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
+	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
+	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
+	PERF_BR_UNKNOWN,	/* X86_BR_NO_TX */
+	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
+	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
+	PERF_BR_IND,		/* X86_BR_IND_JMP */
+};
+
+static int
+common_branch_type(int type)
+{
+	int i;
+
+	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+	if (type) {
+		i = __ffs(type);
+		if (i < X86_BR_TYPE_MAP_MAX)
+			return branch_map[i];
+	}
+
+	return PERF_BR_UNKNOWN;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -946,7 +992,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	bool compress = false;
 
 	/* if sampling all branches, then nothing to filter */
-	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
 		return;
 
 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -967,6 +1014,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 			cpuc->lbr_entries[i].from = 0;
 			compress = true;
 		}
+
+		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+			cpuc->lbr_entries[i].type = common_branch_type(type);
 	}
 
 	if (!compress)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index ae8324d65e61..81fd41d5a0d9 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -471,8 +471,9 @@ static void pt_config(struct perf_event *event)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	u64 reg;
 
-	if (!event->hw.itrace_started) {
-		event->hw.itrace_started = 1;
+	/* First round: clear STATUS, in particular the PSB byte counter. */
+	if (!event->hw.config) {
+		perf_event_itrace_started(event);
 		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
 	}
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 476aec3a4cab..4196f81ec0e1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -91,7 +91,7 @@ struct amd_nb {
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
-	PERF_SAMPLE_TRANSACTION)
+	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
 
 /*
  * A debug store configuration.
@@ -558,6 +558,7 @@ struct x86_pmu {
 	int		attr_rdpmc;
 	struct attribute **format_attrs;
 	struct attribute **event_attrs;
+	struct attribute **caps_attrs;
 
 	ssize_t		(*events_sysfs_show)(char *page, u64 config);
 	struct attribute **cpu_events;
@@ -591,7 +592,8 @@ struct x86_pmu {
 			pebs		:1,
 			pebs_active	:1,
 			pebs_broken	:1,
-			pebs_prec_dist	:1;
+			pebs_prec_dist	:1,
+			pebs_no_tlb	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
@@ -741,6 +743,8 @@ int x86_reserve_hardware(void);
 
 void x86_release_hardware(void);
 
+int x86_pmu_max_precise(void);
+
 void hw_perf_lbr_event_destroy(struct perf_event *event);
 
 int x86_setup_perfctr(struct perf_event *event);
@@ -947,6 +951,8 @@ void intel_pmu_lbr_init_knl(void);
 
 void intel_pmu_pebs_data_source_nhm(void);
 
+void intel_pmu_pebs_data_source_skl(bool pmem);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 171ae09864d7..367a8203cfcf 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1 @@
-obj-y		:= hv_init.o
+obj-y		:= hv_init.o mmu.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 5b882cc0c0e9..1a8eb550c40f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -26,6 +26,8 @@
 #include <linux/mm.h>
 #include <linux/clockchips.h>
 #include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
 
 #ifdef CONFIG_HYPERV_TSCPAGE
 
@@ -75,10 +77,25 @@ static struct clocksource hyperv_cs_msr = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void *hypercall_pg;
+void *hv_hypercall_pg;
+EXPORT_SYMBOL_GPL(hv_hypercall_pg);
 struct clocksource *hyperv_cs;
 EXPORT_SYMBOL_GPL(hyperv_cs);
 
+u32 *hv_vp_index;
+EXPORT_SYMBOL_GPL(hv_vp_index);
+
+static int hv_cpu_init(unsigned int cpu)
+{
+	u64 msr_vp_index;
+
+	hv_get_vp_index(msr_vp_index);
+
+	hv_vp_index[smp_processor_id()] = msr_vp_index;
+
+	return 0;
+}
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
@@ -94,6 +111,16 @@ void hyperv_init(void)
 	if (x86_hyper != &x86_hyper_ms_hyperv)
 		return;
 
+	/* Allocate percpu VP index */
+	hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+				    GFP_KERNEL);
+	if (!hv_vp_index)
+		return;
+
+	if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+			      hv_cpu_init, NULL) < 0)
+		goto free_vp_index;
+
 	/*
 	 * Setup the hypercall page and enable hypercalls.
 	 * 1. Register the guest ID
@@ -102,17 +129,19 @@ void hyperv_init(void)
 	guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
-	hypercall_pg  = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
-	if (hypercall_pg == NULL) {
+	hv_hypercall_pg  = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+	if (hv_hypercall_pg == NULL) {
 		wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
-		return;
+		goto free_vp_index;
 	}
 
 	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 	hypercall_msr.enable = 1;
-	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg);
+	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
 	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
+	hyper_alloc_mmu();
+
 	/*
 	 * Register Hyper-V specific clocksource.
 	 */
@@ -148,6 +177,12 @@ register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+	return;
+
+free_vp_index:
+	kfree(hv_vp_index);
+	hv_vp_index = NULL;
 }
 
 /*
@@ -170,51 +205,6 @@ void hyperv_cleanup(void)
 }
 EXPORT_SYMBOL_GPL(hyperv_cleanup);
 
-/*
- * hv_do_hypercall- Invoke the specified hypercall
- */
-u64 hv_do_hypercall(u64 control, void *input, void *output)
-{
-	u64 input_address = (input) ? virt_to_phys(input) : 0;
-	u64 output_address = (output) ? virt_to_phys(output) : 0;
-#ifdef CONFIG_X86_64
-	u64 hv_status = 0;
-
-	if (!hypercall_pg)
-		return (u64)ULLONG_MAX;
-
-	__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
-	__asm__ __volatile__("call *%3" : "=a" (hv_status) :
-			     "c" (control), "d" (input_address),
-			     "m" (hypercall_pg));
-
-	return hv_status;
-
-#else
-
-	u32 control_hi = control >> 32;
-	u32 control_lo = control & 0xFFFFFFFF;
-	u32 hv_status_hi = 1;
-	u32 hv_status_lo = 1;
-	u32 input_address_hi = input_address >> 32;
-	u32 input_address_lo = input_address & 0xFFFFFFFF;
-	u32 output_address_hi = output_address >> 32;
-	u32 output_address_lo = output_address & 0xFFFFFFFF;
-
-	if (!hypercall_pg)
-		return (u64)ULLONG_MAX;
-
-	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
-			      "=a"(hv_status_lo) : "d" (control_hi),
-			      "a" (control_lo), "b" (input_address_hi),
-			      "c" (input_address_lo), "D"(output_address_hi),
-			      "S"(output_address_lo), "m" (hypercall_pg));
-
-	return hv_status_lo | ((u64)hv_status_hi << 32);
-#endif /* !x86_64 */
-}
-EXPORT_SYMBOL_GPL(hv_do_hypercall);
-
 void hyperv_report_panic(struct pt_regs *regs)
 {
 	static bool panic_reported;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
new file mode 100644
index 000000000000..39e7f6e50919
--- /dev/null
+++ b/arch/x86/hyperv/mmu.c
@@ -0,0 +1,272 @@
+#define pr_fmt(fmt)  "Hyper-V: " fmt
+
+#include <linux/hyperv.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/fpu/api.h>
+#include <asm/mshyperv.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hyperv.h>
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_flush_pcpu {
+	u64 address_space;
+	u64 flags;
+	u64 processor_mask;
+	u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_flush_pcpu_ex {
+	u64 address_space;
+	u64 flags;
+	struct {
+		u64 format;
+		u64 valid_bank_mask;
+		u64 bank_contents[];
+	} hv_vp_set;
+	u64 gva_list[];
+};
+
+/* Each gva in gva_list encodes up to 4096 pages to flush */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static struct hv_flush_pcpu __percpu *pcpu_flush;
+
+static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+
+/*
+ * Fills in gva_list starting from offset. Returns the number of items added.
+ */
+static inline int fill_gva_list(u64 gva_list[], int offset,
+				unsigned long start, unsigned long end)
+{
+	int gva_n = offset;
+	unsigned long cur = start, diff;
+
+	do {
+		diff = end > cur ? end - cur : 0;
+
+		gva_list[gva_n] = cur & PAGE_MASK;
+		/*
+		 * Lower 12 bits encode the number of additional
+		 * pages to flush (in addition to the 'cur' page).
+		 */
+		if (diff >= HV_TLB_FLUSH_UNIT)
+			gva_list[gva_n] |= ~PAGE_MASK;
+		else if (diff)
+			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+
+		cur += HV_TLB_FLUSH_UNIT;
+		gva_n++;
+
+	} while (cur < end);
+
+	return gva_n - offset;
+}
+
+/* Return the number of banks in the resulting vp_set */
+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
+				    const struct cpumask *cpus)
+{
+	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+	/*
+	 * Some banks may end up being empty but this is acceptable.
+	 */
+	for_each_cpu(cpu, cpus) {
+		vcpu = hv_cpu_number_to_vp_number(cpu);
+		vcpu_bank = vcpu / 64;
+		vcpu_offset = vcpu % 64;
+
+		/* valid_bank_mask can represent up to 64 banks */
+		if (vcpu_bank >= 64)
+			return 0;
+
+		__set_bit(vcpu_offset, (unsigned long *)
+			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
+		if (vcpu_bank >= nr_bank)
+			nr_bank = vcpu_bank + 1;
+	}
+	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+
+	return nr_bank;
+}
+
+static void hyperv_flush_tlb_others(const struct cpumask *cpus,
+				    const struct flush_tlb_info *info)
+{
+	int cpu, vcpu, gva_n, max_gvas;
+	struct hv_flush_pcpu *flush;
+	u64 status = U64_MAX;
+	unsigned long flags;
+
+	trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+	if (!pcpu_flush || !hv_hypercall_pg)
+		goto do_native;
+
+	if (cpumask_empty(cpus))
+		return;
+
+	local_irq_save(flags);
+
+	flush = this_cpu_ptr(pcpu_flush);
+
+	if (info->mm) {
+		flush->address_space = virt_to_phys(info->mm->pgd);
+		flush->flags = 0;
+	} else {
+		flush->address_space = 0;
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+	}
+
+	flush->processor_mask = 0;
+	if (cpumask_equal(cpus, cpu_present_mask)) {
+		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+	} else {
+		for_each_cpu(cpu, cpus) {
+			vcpu = hv_cpu_number_to_vp_number(cpu);
+			if (vcpu >= 64)
+				goto do_native;
+
+			__set_bit(vcpu, (unsigned long *)
+				  &flush->processor_mask);
+		}
+	}
+
+	/*
+	 * We can flush not more than max_gvas with one hypercall. Flush the
+	 * whole address space if we were asked to do more.
+	 */
+	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
+
+	if (info->end == TLB_FLUSH_ALL) {
+		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+					 flush, NULL);
+	} else if (info->end &&
+		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+					 flush, NULL);
+	} else {
+		gva_n = fill_gva_list(flush->gva_list, 0,
+				      info->start, info->end);
+		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+					     gva_n, 0, flush, NULL);
+	}
+
+	local_irq_restore(flags);
+
+	if (!(status & HV_HYPERCALL_RESULT_MASK))
+		return;
+do_native:
+	native_flush_tlb_others(cpus, info);
+}
+
+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+				       const struct flush_tlb_info *info)
+{
+	int nr_bank = 0, max_gvas, gva_n;
+	struct hv_flush_pcpu_ex *flush;
+	u64 status = U64_MAX;
+	unsigned long flags;
+
+	trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+	if (!pcpu_flush_ex || !hv_hypercall_pg)
+		goto do_native;
+
+	if (cpumask_empty(cpus))
+		return;
+
+	local_irq_save(flags);
+
+	flush = this_cpu_ptr(pcpu_flush_ex);
+
+	if (info->mm) {
+		flush->address_space = virt_to_phys(info->mm->pgd);
+		flush->flags = 0;
+	} else {
+		flush->address_space = 0;
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+	}
+
+	flush->hv_vp_set.valid_bank_mask = 0;
+
+	if (!cpumask_equal(cpus, cpu_present_mask)) {
+		flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
+		nr_bank = cpumask_to_vp_set(flush, cpus);
+	}
+
+	if (!nr_bank) {
+		flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
+		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+	}
+
+	/*
+	 * We can flush not more than max_gvas with one hypercall. Flush the
+	 * whole address space if we were asked to do more.
+	 */
+	max_gvas =
+		(PAGE_SIZE - sizeof(*flush) - nr_bank *
+		 sizeof(flush->hv_vp_set.bank_contents[0])) /
+		sizeof(flush->gva_list[0]);
+
+	if (info->end == TLB_FLUSH_ALL) {
+		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+		status = hv_do_rep_hypercall(
+			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+			0, nr_bank + 2, flush, NULL);
+	} else if (info->end &&
+		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+		status = hv_do_rep_hypercall(
+			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+			0, nr_bank + 2, flush, NULL);
+	} else {
+		gva_n = fill_gva_list(flush->gva_list, nr_bank,
+				      info->start, info->end);
+		status = hv_do_rep_hypercall(
+			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+			gva_n, nr_bank + 2, flush, NULL);
+	}
+
+	local_irq_restore(flags);
+
+	if (!(status & HV_HYPERCALL_RESULT_MASK))
+		return;
+do_native:
+	native_flush_tlb_others(cpus, info);
+}
+
+void hyperv_setup_mmu_ops(void)
+{
+	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+		return;
+
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+
+	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
+		pr_info("Using hypercall for remote TLB flush\n");
+		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+	} else {
+		pr_info("Using ext hypercall for remote TLB flush\n");
+		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
+	}
+}
+
+void hyper_alloc_mmu(void)
+{
+	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+		return;
+
+	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+		pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+	else
+		pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 724153797209..e0bb46c02857 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -226,7 +226,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 	if (ksig->ka.sa.sa_flags & SA_ONSTACK)
 		sp = sigsp(sp, ksig);
 	/* This is the legacy signal stack switching. */
-	else if ((regs->ss & 0xffff) != __USER32_DS &&
+	else if (regs->ss != __USER32_DS &&
 		!(ksig->ka.sa.sa_flags & SA_RESTORER) &&
 		 ksig->ka.sa.sa_restorer)
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2efc768e4362..72d867f6b518 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,8 +150,6 @@ static inline void disable_acpi(void) { }
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
-#define acpi_unlazy_tlb(x)	leave_mm(x)
-
 #ifdef CONFIG_ACPI_APEI
 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 {
@@ -162,12 +160,13 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 	 * you call efi_mem_attributes() during boot and at runtime,
 	 * you could theoretically see different attributes.
 	 *
-	 * Since we are yet to see any x86 platforms that require
-	 * anything other than PAGE_KERNEL (some arm64 platforms
-	 * require the equivalent of PAGE_KERNEL_NOCACHE), return that
-	 * until we know differently.
+	 * We are yet to see any x86 platforms that require anything
+	 * other than PAGE_KERNEL (some ARM64 platforms require the
+	 * equivalent of PAGE_KERNEL_NOCACHE). Additionally, if SME
+	 * is active, the ACPI information will not be encrypted,
+	 * so return PAGE_KERNEL_NOENC until we know differently.
 	 */
-	 return PAGE_KERNEL;
+	return PAGE_KERNEL_NOENC;
 }
 #endif
 
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7a9df3beb89b..676ee5807d86 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -74,6 +74,9 @@
 # define _ASM_EXTABLE_EX(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
 # define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
 	_ASM_ALIGN ;						\
@@ -123,6 +126,9 @@
 # define _ASM_EXTABLE_EX(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 33380b871463..0874ebda3069 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-#define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	asm volatile(LOCK_PREFIX #op"l %1,%0"				\
-			: "+m" (v->counter)				\
-			: "ir" (i)					\
-			: "memory");					\
+static inline void atomic_and(int i, atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "andl %1,%0"
+			: "+m" (v->counter)
+			: "ir" (i)
+			: "memory");
+}
+
+static inline int atomic_fetch_and(int i, atomic_t *v)
+{
+	int val = atomic_read(v);
+
+	do { } while (!atomic_try_cmpxchg(v, &val, val & i));
+
+	return val;
 }
 
-#define ATOMIC_FETCH_OP(op, c_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	int val = atomic_read(v);					\
-	do {								\
-	} while (!atomic_try_cmpxchg(v, &val, val c_op i));		\
-	return val;							\
+static inline void atomic_or(int i, atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "orl %1,%0"
+			: "+m" (v->counter)
+			: "ir" (i)
+			: "memory");
 }
 
-#define ATOMIC_OPS(op, c_op)						\
-	ATOMIC_OP(op)							\
-	ATOMIC_FETCH_OP(op, c_op)
+static inline int atomic_fetch_or(int i, atomic_t *v)
+{
+	int val = atomic_read(v);
 
-ATOMIC_OPS(and, &)
-ATOMIC_OPS(or , |)
-ATOMIC_OPS(xor, ^)
+	do { } while (!atomic_try_cmpxchg(v, &val, val | i));
 
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP
+	return val;
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "xorl %1,%0"
+			: "+m" (v->counter)
+			: "ir" (i)
+			: "memory");
+}
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
+{
+	int val = atomic_read(v);
+
+	do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
+
+	return val;
+}
 
 /**
  * __atomic_add_unless - add unless the number is already a given value
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^)
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c = atomic_read(v);
+
 	do {
 		if (unlikely(c == u))
 			break;
 	} while (!atomic_try_cmpxchg(v, &c, c + a));
+
 	return c;
 }
 
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 71d7705fb303..9e206f31ce2a 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #undef alternative_atomic64
 #undef __alternative_atomic64
 
-#define ATOMIC64_OP(op, c_op)						\
-static inline void atomic64_##op(long long i, atomic64_t *v)		\
-{									\
-	long long old, c = 0;						\
-	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
-		c = old;						\
+static inline void atomic64_and(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+		c = old;
 }
 
-#define ATOMIC64_FETCH_OP(op, c_op)					\
-static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
-{									\
-	long long old, c = 0;						\
-	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
-		c = old;						\
-	return old;							\
+static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+		c = old;
+
+	return old;
 }
 
-ATOMIC64_FETCH_OP(add, +)
+static inline void atomic64_or(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
 
-#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
+	while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+		c = old;
+}
+
+static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+		c = old;
+
+	return old;
+}
 
-#define ATOMIC64_OPS(op, c_op)						\
-	ATOMIC64_OP(op, c_op)						\
-	ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_xor(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+		c = old;
+}
 
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+		c = old;
+
+	return old;
+}
 
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	long long old, c = 0;
+
+	while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
+		c = old;
+
+	return old;
+}
+
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 
 #endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a433c9a9..5d9de36a2f04 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
 }
 
 #define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
 {
 	return try_cmpxchg(&v->counter, old, new);
 }
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
  */
 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c = atomic64_read(v);
+	s64 c = atomic64_read(v);
 	do {
 		if (unlikely(c == u))
 			return false;
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
  */
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
-	long dec, c = atomic64_read(v);
+	s64 dec, c = atomic64_read(v);
 	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define ATOMIC64_OP(op)							\
-static inline void atomic64_##op(long i, atomic64_t *v)			\
-{									\
-	asm volatile(LOCK_PREFIX #op"q %1,%0"				\
-			: "+m" (v->counter)				\
-			: "er" (i)					\
-			: "memory");					\
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+	asm volatile(LOCK_PREFIX "andq %1,%0"
+			: "+m" (v->counter)
+			: "er" (i)
+			: "memory");
 }
 
-#define ATOMIC64_FETCH_OP(op, c_op)					\
-static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
-{									\
-	long val = atomic64_read(v);					\
-	do {								\
-	} while (!atomic64_try_cmpxchg(v, &val, val c_op i));		\
-	return val;							\
+static inline long atomic64_fetch_and(long i, atomic64_t *v)
+{
+	s64 val = atomic64_read(v);
+
+	do {
+	} while (!atomic64_try_cmpxchg(v, &val, val & i));
+	return val;
 }
 
-#define ATOMIC64_OPS(op, c_op)						\
-	ATOMIC64_OP(op)							\
-	ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+	asm volatile(LOCK_PREFIX "orq %1,%0"
+			: "+m" (v->counter)
+			: "er" (i)
+			: "memory");
+}
 
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long atomic64_fetch_or(long i, atomic64_t *v)
+{
+	s64 val = atomic64_read(v);
 
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+	do {
+	} while (!atomic64_try_cmpxchg(v, &val, val | i));
+	return val;
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+	asm volatile(LOCK_PREFIX "xorq %1,%0"
+			: "+m" (v->counter)
+			: "er" (i)
+			: "memory");
+}
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
+{
+	s64 val = atomic64_read(v);
+
+	do {
+	} while (!atomic64_try_cmpxchg(v, &val, val ^ i));
+	return val;
+}
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index e01f7f7ccb0c..84ae170bc3d0 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,5 +2,7 @@
 #define _ASM_X86_CMDLINE_H
 
 int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
+			char *buffer, int bufsize);
 
 #endif /* _ASM_X86_CMDLINE_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d90296d061e8..b5069e802d5c 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void)
 #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
 ({									\
 	bool success;							\
-	__typeof__(_ptr) _old = (_pold);				\
+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
 	__typeof__(*(_ptr)) __old = *_old;				\
 	__typeof__(*(_ptr)) __new = (_new);				\
 	switch (size) {							\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5a28e8e55e36..42bbbf0f173d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -177,7 +177,7 @@
 #define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
 #define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
 #define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_PERFCTR_LLC	( 6*32+28) /* Last Level Cache performance counter extensions */
 #define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
 
 /*
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
 
 #define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d0a21b12dd58..1a2ba368da39 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -5,6 +5,7 @@
 #include <asm/ldt.h>
 #include <asm/mmu.h>
 #include <asm/fixmap.h>
+#include <asm/irq_vectors.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -22,7 +23,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 	desc->s			= 1;
 	desc->dpl		= 0x3;
 	desc->p			= info->seg_not_present ^ 1;
-	desc->limit		= (info->limit & 0xf0000) >> 16;
+	desc->limit1		= (info->limit & 0xf0000) >> 16;
 	desc->avl		= info->useable;
 	desc->d			= info->seg_32bit;
 	desc->g			= info->limit_in_pages;
@@ -83,33 +84,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
 	return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
 }
 
-#ifdef CONFIG_X86_64
-
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
-	gate->offset_low	= PTR_LOW(func);
+	gate->offset_low	= (u16) func;
+	gate->bits.p		= 1;
+	gate->bits.dpl		= dpl;
+	gate->bits.zero		= 0;
+	gate->bits.type		= type;
+	gate->offset_middle	= (u16) (func >> 16);
+#ifdef CONFIG_X86_64
 	gate->segment		= __KERNEL_CS;
-	gate->ist		= ist;
-	gate->p			= 1;
-	gate->dpl		= dpl;
-	gate->zero0		= 0;
-	gate->zero1		= 0;
-	gate->type		= type;
-	gate->offset_middle	= PTR_MIDDLE(func);
-	gate->offset_high	= PTR_HIGH(func);
-}
-
+	gate->bits.ist		= ist;
+	gate->reserved		= 0;
+	gate->offset_high	= (u32) (func >> 32);
 #else
-static inline void pack_gate(gate_desc *gate, unsigned char type,
-			     unsigned long base, unsigned dpl, unsigned flags,
-			     unsigned short seg)
-{
-	gate->a = (seg << 16) | (base & 0xffff);
-	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
-}
-
+	gate->segment		= seg;
+	gate->bits.ist		= 0;
 #endif
+}
 
 static inline int desc_empty(const void *ptr)
 {
@@ -173,35 +166,22 @@ native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int
 	memcpy(&gdt[entry], desc, size);
 }
 
-static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
-				   unsigned long limit, unsigned char type,
-				   unsigned char flags)
-{
-	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
-	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
-		(limit & 0x000f0000) | ((type & 0xff) << 8) |
-		((flags & 0xf) << 20);
-	desc->p = 1;
-}
-
-
-static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
+					 unsigned type, unsigned size)
 {
-#ifdef CONFIG_X86_64
-	struct ldttss_desc64 *desc = d;
+	struct ldttss_desc *desc = d;
 
 	memset(desc, 0, sizeof(*desc));
 
-	desc->limit0		= size & 0xFFFF;
-	desc->base0		= PTR_LOW(addr);
-	desc->base1		= PTR_MIDDLE(addr) & 0xFF;
+	desc->limit0		= (u16) size;
+	desc->base0		= (u16) addr;
+	desc->base1		= (addr >> 16) & 0xFF;
 	desc->type		= type;
 	desc->p			= 1;
 	desc->limit1		= (size >> 16) & 0xF;
-	desc->base2		= (PTR_MIDDLE(addr) >> 8) & 0xFF;
-	desc->base3		= PTR_HIGH(addr);
-#else
-	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+	desc->base2		= (addr >> 24) & 0xFF;
+#ifdef CONFIG_X86_64
+	desc->base3		= (u32) (addr >> 32);
 #endif
 }
 
@@ -401,147 +381,20 @@ static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
 
 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
 {
-	return desc->limit0 | (desc->limit << 16);
+	return desc->limit0 | (desc->limit1 << 16);
 }
 
 static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 {
 	desc->limit0 = limit & 0xffff;
-	desc->limit = (limit >> 16) & 0xf;
-}
-
-#ifdef CONFIG_X86_64
-static inline void set_nmi_gate(int gate, void *addr)
-{
-	gate_desc s;
-
-	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
-	write_idt_entry(debug_idt_table, gate, &s);
+	desc->limit1 = (limit >> 16) & 0xf;
 }
-#endif
 
-#ifdef CONFIG_TRACING
-extern struct desc_ptr trace_idt_descr;
-extern gate_desc trace_idt_table[];
-static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
-{
-	write_idt_entry(trace_idt_table, entry, gate);
-}
+void update_intr_gate(unsigned int n, const void *addr);
+void alloc_intr_gate(unsigned int n, const void *addr);
 
-static inline void _trace_set_gate(int gate, unsigned type, void *addr,
-				   unsigned dpl, unsigned ist, unsigned seg)
-{
-	gate_desc s;
-
-	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
-	/*
-	 * does not need to be atomic because it is only done once at
-	 * setup time
-	 */
-	write_trace_idt_entry(gate, &s);
-}
-#else
-static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
-{
-}
-
-#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
-#endif
-
-static inline void _set_gate(int gate, unsigned type, void *addr,
-			     unsigned dpl, unsigned ist, unsigned seg)
-{
-	gate_desc s;
-
-	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
-	/*
-	 * does not need to be atomic because it is only done once at
-	 * setup time
-	 */
-	write_idt_entry(idt_table, gate, &s);
-	write_trace_idt_entry(gate, &s);
-}
-
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-#define set_intr_gate_notrace(n, addr)					\
-	do {								\
-		BUG_ON((unsigned)n > 0xFF);				\
-		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
-			  __KERNEL_CS);					\
-	} while (0)
-
-#define set_intr_gate(n, addr)						\
-	do {								\
-		set_intr_gate_notrace(n, addr);				\
-		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
-				0, 0, __KERNEL_CS);			\
-	} while (0)
-
-extern int first_system_vector;
-/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
 extern unsigned long used_vectors[];
 
-static inline void alloc_system_vector(int vector)
-{
-	if (!test_bit(vector, used_vectors)) {
-		set_bit(vector, used_vectors);
-		if (first_system_vector > vector)
-			first_system_vector = vector;
-	} else {
-		BUG();
-	}
-}
-
-#define alloc_intr_gate(n, addr)				\
-	do {							\
-		alloc_system_vector(n);				\
-		set_intr_gate(n, addr);				\
-	} while (0)
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_system_trap_gate(unsigned int n, void *addr)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_trap_gate(unsigned int n, void *addr)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
-}
-
-static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
-}
-
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
-}
-
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
-}
-
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u32, debug_idt_ctr);
 static inline bool is_debug_idt_enabled(void)
@@ -567,31 +420,6 @@ static inline void load_debug_idt(void)
 }
 #endif
 
-#ifdef CONFIG_TRACING
-extern atomic_t trace_idt_ctr;
-static inline bool is_trace_idt_enabled(void)
-{
-	if (atomic_read(&trace_idt_ctr))
-		return true;
-
-	return false;
-}
-
-static inline void load_trace_idt(void)
-{
-	load_idt((const struct desc_ptr *)&trace_idt_descr);
-}
-#else
-static inline bool is_trace_idt_enabled(void)
-{
-	return false;
-}
-
-static inline void load_trace_idt(void)
-{
-}
-#endif
-
 /*
  * The load_current_idt() must be called with interrupts disabled
  * to avoid races. That way the IDT will always be set back to the expected
@@ -603,9 +431,25 @@ static inline void load_current_idt(void)
 {
 	if (is_debug_idt_enabled())
 		load_debug_idt();
-	else if (is_trace_idt_enabled())
-		load_trace_idt();
 	else
 		load_idt((const struct desc_ptr *)&idt_descr);
 }
+
+extern void idt_setup_early_handler(void);
+extern void idt_setup_early_traps(void);
+extern void idt_setup_traps(void);
+extern void idt_setup_apic_and_irq_gates(void);
+
+#ifdef CONFIG_X86_64
+extern void idt_setup_early_pf(void);
+extern void idt_setup_ist_traps(void);
+extern void idt_setup_debugidt_traps(void);
+#else
+static inline void idt_setup_early_pf(void) { }
+static inline void idt_setup_ist_traps(void) { }
+static inline void idt_setup_debugidt_traps(void) { }
+#endif
+
+extern void idt_invalidate(void *addr);
+
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 49265345d4d2..346d252029b7 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -11,34 +11,30 @@
 
 #include <linux/types.h>
 
-/*
- * FIXME: Accessing the desc_struct through its fields is more elegant,
- * and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b accessors, and doing this allow us to do it
- * incrementally. We keep the signature as a struct, rather than a union,
- * so we can get rid of it transparently in the future -- glommer
- */
 /* 8 byte segment descriptor */
 struct desc_struct {
-	union {
-		struct {
-			unsigned int a;
-			unsigned int b;
-		};
-		struct {
-			u16 limit0;
-			u16 base0;
-			unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
-			unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
-		};
-	};
+	u16	limit0;
+	u16	base0;
+	u16	base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+	u16	limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
 } __attribute__((packed));
 
-#define GDT_ENTRY_INIT(flags, base, limit) { { { \
-		.a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
-		.b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
-			((limit) & 0xf0000) | ((base) & 0xff000000), \
-	} } }
+#define GDT_ENTRY_INIT(flags, base, limit)			\
+	{							\
+		.limit0		= (u16) (limit),		\
+		.limit1		= ((limit) >> 16) & 0x0F,	\
+		.base0		= (u16) (base),			\
+		.base1		= ((base) >> 16) & 0xFF,	\
+		.base2		= ((base) >> 24) & 0xFF,	\
+		.type		= (flags & 0x0f),		\
+		.s		= (flags >> 4) & 0x01,		\
+		.dpl		= (flags >> 5) & 0x03,		\
+		.p		= (flags >> 7) & 0x01,		\
+		.avl		= (flags >> 12) & 0x01,		\
+		.l		= (flags >> 13) & 0x01,		\
+		.d		= (flags >> 14) & 0x01,		\
+		.g		= (flags >> 15) & 0x01,		\
+	}
 
 enum {
 	GATE_INTERRUPT = 0xE,
@@ -47,49 +43,63 @@ enum {
 	GATE_TASK = 0x5,
 };
 
-/* 16byte gate */
-struct gate_struct64 {
-	u16 offset_low;
-	u16 segment;
-	unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
-	u16 offset_middle;
-	u32 offset_high;
-	u32 zero1;
-} __attribute__((packed));
-
-#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
-
 enum {
 	DESC_TSS = 0x9,
 	DESC_LDT = 0x2,
 	DESCTYPE_S = 0x10,	/* !system */
 };
 
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct ldttss_desc64 {
-	u16 limit0;
-	u16 base0;
-	unsigned base1 : 8, type : 5, dpl : 2, p : 1;
-	unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
-	u32 base3;
-	u32 zero1;
+/* LDT or TSS descriptor in the GDT. */
+struct ldttss_desc {
+	u16	limit0;
+	u16	base0;
+
+	u16	base1 : 8, type : 5, dpl : 2, p : 1;
+	u16	limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+#ifdef CONFIG_X86_64
+	u32	base3;
+	u32	zero1;
+#endif
 } __attribute__((packed));
 
+typedef struct ldttss_desc ldt_desc;
+typedef struct ldttss_desc tss_desc;
+
+struct idt_bits {
+	u16		ist	: 3,
+			zero	: 5,
+			type	: 5,
+			dpl	: 2,
+			p	: 1;
+} __attribute__((packed));
+
+struct gate_struct {
+	u16		offset_low;
+	u16		segment;
+	struct idt_bits	bits;
+	u16		offset_middle;
+#ifdef CONFIG_X86_64
+	u32		offset_high;
+	u32		reserved;
+#endif
+} __attribute__((packed));
+
+typedef struct gate_struct gate_desc;
+
+static inline unsigned long gate_offset(const gate_desc *g)
+{
 #ifdef CONFIG_X86_64
-typedef struct gate_struct64 gate_desc;
-typedef struct ldttss_desc64 ldt_desc;
-typedef struct ldttss_desc64 tss_desc;
-#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
-#define gate_segment(g) ((g).segment)
+	return g->offset_low | ((unsigned long)g->offset_middle << 16) |
+		((unsigned long) g->offset_high << 32);
 #else
-typedef struct desc_struct gate_desc;
-typedef struct desc_struct ldt_desc;
-typedef struct desc_struct tss_desc;
-#define gate_offset(g)		(((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
-#define gate_segment(g)		((g).a >> 16)
+	return g->offset_low | ((unsigned long)g->offset_middle << 16);
 #endif
+}
+
+static inline unsigned long gate_segment(const gate_desc *g)
+{
+	return g->segment;
+}
 
 struct desc_ptr {
 	unsigned short size;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 5dff775af7cd..c10c9128f54e 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID		0
 #else
 # define DISABLE_VME		0
 # define DISABLE_K6_MTRR	0
 # define DISABLE_CYRIX_ARR	0
 # define DISABLE_CENTAUR_MCR	0
+# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
 #define DISABLED_MASK1	0
 #define DISABLED_MASK2	0
 #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	0
+#define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	0
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 398c79889f5c..1387dafdba2d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,6 +12,7 @@
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 #include <linux/dma-contiguous.h>
+#include <linux/mem_encrypt.h>
 
 #ifdef CONFIG_ISA
 # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -57,12 +58,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	return paddr;
+	return __sme_set(paddr);
 }
 
 static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	return daddr;
+	return __sme_clr(daddr);
 }
 #endif /* CONFIG_X86_DMA_REMAP */
 
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 3c69fed215c5..a8e15b04565b 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
 }
 
 /* Use early IO mappings for DMI because it's initialized early */
-#define dmi_early_remap		early_ioremap
-#define dmi_early_unmap		early_iounmap
-#define dmi_remap		ioremap_cache
-#define dmi_unmap		iounmap
+#define dmi_early_remap		early_memremap
+#define dmi_early_unmap		early_memunmap
+#define dmi_remap(_x, _l)	memremap(_x, _l, MEMREMAP_WB)
+#define dmi_unmap(_x)		memunmap(_x)
 
 #endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index a504adc661a4..cd266d830e49 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -39,6 +39,8 @@ extern void e820__setup_pci_gap(void);
 extern void e820__reallocate_tables(void);
 extern void e820__register_nosave_regions(unsigned long limit_pfn);
 
+extern int  e820__get_entry_type(u64 start, u64 end);
+
 /*
  * Returns true iff the specified range [start,end) is completely contained inside
  * the ISA region.
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9aeb91935ce0..04330c8d9af9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -126,15 +126,15 @@ do {						\
 	pr_reg[4] = regs->di;			\
 	pr_reg[5] = regs->bp;			\
 	pr_reg[6] = regs->ax;			\
-	pr_reg[7] = regs->ds & 0xffff;		\
-	pr_reg[8] = regs->es & 0xffff;		\
-	pr_reg[9] = regs->fs & 0xffff;		\
+	pr_reg[7] = regs->ds;			\
+	pr_reg[8] = regs->es;			\
+	pr_reg[9] = regs->fs;			\
 	pr_reg[11] = regs->orig_ax;		\
 	pr_reg[12] = regs->ip;			\
-	pr_reg[13] = regs->cs & 0xffff;		\
+	pr_reg[13] = regs->cs;			\
 	pr_reg[14] = regs->flags;		\
 	pr_reg[15] = regs->sp;			\
-	pr_reg[16] = regs->ss & 0xffff;		\
+	pr_reg[16] = regs->ss;			\
 } while (0);
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)	\
@@ -204,6 +204,7 @@ void set_personality_ia32(bool);
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)			\
 do {								\
+	unsigned long base;					\
 	unsigned v;						\
 	(pr_reg)[0] = (regs)->r15;				\
 	(pr_reg)[1] = (regs)->r14;				\
@@ -226,8 +227,8 @@ do {								\
 	(pr_reg)[18] = (regs)->flags;				\
 	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
-	(pr_reg)[21] = current->thread.fsbase;			\
-	(pr_reg)[22] = current->thread.gsbase;			\
+	rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base;		\
+	rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base;	\
 	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
 	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
 	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\
@@ -304,8 +305,8 @@ static inline int mmap_is_ia32(void)
 		test_thread_flag(TIF_ADDR32));
 }
 
-extern unsigned long tasksize_32bit(void);
-extern unsigned long tasksize_64bit(void);
+extern unsigned long task_size_32bit(void);
+extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 07b06955a05d..aa15d1f7e530 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,20 +13,14 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
-		 smp_irq_move_cleanup_interrupt)
-BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
 #endif
 
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
 #ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
-		 smp_kvm_posted_intr_ipi)
-BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
-		 smp_kvm_posted_intr_wakeup_ipi)
-BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
-		 smp_kvm_posted_intr_nested_ipi)
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
+BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
 #endif
 
 /*
@@ -41,6 +35,7 @@ BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
 BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
 #ifdef CONFIG_IRQ_WORK
 BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b65155cc3760..dcd9fb55e679 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -157,6 +157,26 @@ static inline void __set_fixmap(enum fixed_addresses idx,
 }
 #endif
 
+/*
+ * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not
+ * supported for MMIO addresses, so make sure that the memory encryption
+ * mask is not part of the page attributes.
+ */
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
+
+/*
+ * Early memremap routines used for in-place encryption. The mappings created
+ * by these routines are intended to be used as temporary mappings.
+ */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+				      unsigned long size);
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+					 unsigned long size);
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+				      unsigned long size);
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+					 unsigned long size);
+
 #include <asm-generic/fixmap.h>
 
 #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index b4c1f5453436..f4dc9b63bdda 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -41,20 +41,11 @@
 		       "+m" (*uaddr), "=&r" (tem)		\
 		     : "r" (oparg), "i" (-EFAULT), "1" (0))
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret, tem;
 
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
 	pagefault_disable();
 
 	switch (op) {
@@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ:
-			ret = (oldval == cmparg);
-			break;
-		case FUTEX_OP_CMP_NE:
-			ret = (oldval != cmparg);
-			break;
-		case FUTEX_OP_CMP_LT:
-			ret = (oldval < cmparg);
-			break;
-		case FUTEX_OP_CMP_GE:
-			ret = (oldval >= cmparg);
-			break;
-		case FUTEX_OP_CMP_LE:
-			ret = (oldval <= cmparg);
-			break;
-		case FUTEX_OP_CMP_GT:
-			ret = (oldval > cmparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
+	if (!ret)
+		*oval = oldval;
+
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index d6dbafbd4207..6dfe366a8804 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -46,26 +46,6 @@ extern asmlinkage void deferred_error_interrupt(void);
 extern asmlinkage void call_function_interrupt(void);
 extern asmlinkage void call_function_single_interrupt(void);
 
-#ifdef CONFIG_TRACING
-/* Interrupt handlers registered during init_IRQ */
-extern void trace_apic_timer_interrupt(void);
-extern void trace_x86_platform_ipi(void);
-extern void trace_error_interrupt(void);
-extern void trace_irq_work_interrupt(void);
-extern void trace_spurious_interrupt(void);
-extern void trace_thermal_interrupt(void);
-extern void trace_reschedule_interrupt(void);
-extern void trace_threshold_interrupt(void);
-extern void trace_deferred_error_interrupt(void);
-extern void trace_call_function_interrupt(void);
-extern void trace_call_function_single_interrupt(void);
-#define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
-#define trace_reboot_interrupt  reboot_interrupt
-#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
-#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
-#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi
-#endif /* CONFIG_TRACING */
-
 #ifdef	CONFIG_X86_LOCAL_APIC
 struct irq_data;
 struct pci_dev;
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c66fee..05c4aa00cc86 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
 	bool direct_gbpages;		 /* PUD level 1GB page support */
+	unsigned long kernpg_flag;	 /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
deleted file mode 100644
index 597dc4995678..000000000000
--- a/arch/x86/include/asm/intel_rdt.h
+++ /dev/null
@@ -1,286 +0,0 @@
-#ifndef _ASM_X86_INTEL_RDT_H
-#define _ASM_X86_INTEL_RDT_H
-
-#ifdef CONFIG_INTEL_RDT_A
-
-#include <linux/sched.h>
-#include <linux/kernfs.h>
-#include <linux/jump_label.h>
-
-#include <asm/intel_rdt_common.h>
-
-#define IA32_L3_QOS_CFG		0xc81
-#define IA32_L3_CBM_BASE	0xc90
-#define IA32_L2_CBM_BASE	0xd10
-#define IA32_MBA_THRTL_BASE	0xd50
-
-#define L3_QOS_CDP_ENABLE	0x01ULL
-
-/**
- * struct rdtgroup - store rdtgroup's data in resctrl file system.
- * @kn:				kernfs node
- * @rdtgroup_list:		linked list for all rdtgroups
- * @closid:			closid for this rdtgroup
- * @cpu_mask:			CPUs assigned to this rdtgroup
- * @flags:			status bits
- * @waitcount:			how many cpus expect to find this
- *				group when they acquire rdtgroup_mutex
- */
-struct rdtgroup {
-	struct kernfs_node	*kn;
-	struct list_head	rdtgroup_list;
-	int			closid;
-	struct cpumask		cpu_mask;
-	int			flags;
-	atomic_t		waitcount;
-};
-
-/* rdtgroup.flags */
-#define	RDT_DELETED		1
-
-/* rftype.flags */
-#define RFTYPE_FLAGS_CPUS_LIST	1
-
-/* List of all resource groups */
-extern struct list_head rdt_all_groups;
-
-extern int max_name_width, max_data_width;
-
-int __init rdtgroup_init(void);
-
-/**
- * struct rftype - describe each file in the resctrl file system
- * @name:	File name
- * @mode:	Access mode
- * @kf_ops:	File operations
- * @flags:	File specific RFTYPE_FLAGS_* flags
- * @seq_show:	Show content of the file
- * @write:	Write to the file
- */
-struct rftype {
-	char			*name;
-	umode_t			mode;
-	struct kernfs_ops	*kf_ops;
-	unsigned long		flags;
-
-	int (*seq_show)(struct kernfs_open_file *of,
-			struct seq_file *sf, void *v);
-	/*
-	 * write() is the generic write callback which maps directly to
-	 * kernfs write operation and overrides all other operations.
-	 * Maximum write size is determined by ->max_write_len.
-	 */
-	ssize_t (*write)(struct kernfs_open_file *of,
-			 char *buf, size_t nbytes, loff_t off);
-};
-
-/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list:	all instances of this resource
- * @id:		unique id for this instance
- * @cpu_mask:	which cpus share this resource
- * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
- * @new_ctrl:	new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- */
-struct rdt_domain {
-	struct list_head	list;
-	int			id;
-	struct cpumask		cpu_mask;
-	u32			*ctrl_val;
-	u32			new_ctrl;
-	bool			have_new_ctrl;
-};
-
-/**
- * struct msr_param - set a range of MSRs from a domain
- * @res:       The resource to use
- * @low:       Beginning index from base MSR
- * @high:      End index
- */
-struct msr_param {
-	struct rdt_resource	*res;
-	int			low;
-	int			high;
-};
-
-/**
- * struct rdt_cache - Cache allocation related data
- * @cbm_len:		Length of the cache bit mask
- * @min_cbm_bits:	Minimum number of consecutive bits to be set
- * @cbm_idx_mult:	Multiplier of CBM index
- * @cbm_idx_offset:	Offset of CBM index. CBM index is computed by:
- *			closid * cbm_idx_multi + cbm_idx_offset
- *			in a cache bit mask
- */
-struct rdt_cache {
-	unsigned int	cbm_len;
-	unsigned int	min_cbm_bits;
-	unsigned int	cbm_idx_mult;
-	unsigned int	cbm_idx_offset;
-};
-
-/**
- * struct rdt_membw - Memory bandwidth allocation related data
- * @max_delay:		Max throttle delay. Delay is the hardware
- *			representation for memory bandwidth.
- * @min_bw:		Minimum memory bandwidth percentage user can request
- * @bw_gran:		Granularity at which the memory bandwidth is allocated
- * @delay_linear:	True if memory B/W delay is in linear scale
- * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
- */
-struct rdt_membw {
-	u32		max_delay;
-	u32		min_bw;
-	u32		bw_gran;
-	u32		delay_linear;
-	u32		*mb_map;
-};
-
-/**
- * struct rdt_resource - attributes of an RDT resource
- * @enabled:		Is this feature enabled on this machine
- * @capable:		Is this feature available on this machine
- * @name:		Name to use in "schemata" file
- * @num_closid:		Number of CLOSIDs available
- * @cache_level:	Which cache level defines scope of this resource
- * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
- * @msr_base:		Base MSR address for CBMs
- * @msr_update:		Function pointer to update QOS MSRs
- * @data_width:		Character width of data when displaying
- * @domains:		All domains for this resource
- * @cache:		Cache allocation related data
- * @info_files:		resctrl info files for the resource
- * @nr_info_files:	Number of info files
- * @format_str:		Per resource format string to show domain value
- * @parse_ctrlval:	Per resource function pointer to parse control values
- */
-struct rdt_resource {
-	bool			enabled;
-	bool			capable;
-	char			*name;
-	int			num_closid;
-	int			cache_level;
-	u32			default_ctrl;
-	unsigned int		msr_base;
-	void (*msr_update)	(struct rdt_domain *d, struct msr_param *m,
-				 struct rdt_resource *r);
-	int			data_width;
-	struct list_head	domains;
-	struct rdt_cache	cache;
-	struct rdt_membw	membw;
-	struct rftype		*info_files;
-	int			nr_info_files;
-	const char		*format_str;
-	int (*parse_ctrlval)	(char *buf, struct rdt_resource *r,
-				 struct rdt_domain *d);
-};
-
-void rdt_get_cache_infofile(struct rdt_resource *r);
-void rdt_get_mba_infofile(struct rdt_resource *r);
-int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
-int parse_bw(char *buf, struct rdt_resource *r,  struct rdt_domain *d);
-
-extern struct mutex rdtgroup_mutex;
-
-extern struct rdt_resource rdt_resources_all[];
-extern struct rdtgroup rdtgroup_default;
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-
-int __init rdtgroup_init(void);
-
-enum {
-	RDT_RESOURCE_L3,
-	RDT_RESOURCE_L3DATA,
-	RDT_RESOURCE_L3CODE,
-	RDT_RESOURCE_L2,
-	RDT_RESOURCE_MBA,
-
-	/* Must be the last */
-	RDT_NUM_RESOURCES,
-};
-
-#define for_each_capable_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->capable)
-
-#define for_each_enabled_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->enabled)
-
-/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
-union cpuid_0x10_1_eax {
-	struct {
-		unsigned int cbm_len:5;
-	} split;
-	unsigned int full;
-};
-
-/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
-union cpuid_0x10_3_eax {
-	struct {
-		unsigned int max_delay:12;
-	} split;
-	unsigned int full;
-};
-
-/* CPUID.(EAX=10H, ECX=ResID).EDX */
-union cpuid_0x10_x_edx {
-	struct {
-		unsigned int cos_max:16;
-	} split;
-	unsigned int full;
-};
-
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-
-void rdt_ctrl_update(void *arg);
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
-void rdtgroup_kn_unlock(struct kernfs_node *kn);
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
-				char *buf, size_t nbytes, loff_t off);
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
-			   struct seq_file *s, void *v);
-
-/*
- * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
- *
- * Following considerations are made so that this has minimal impact
- * on scheduler hot path:
- * - This will stay as no-op unless we are running on an Intel SKU
- *   which supports resource control and we enable by mounting the
- *   resctrl file system.
- * - Caches the per cpu CLOSid values and does the MSR write only
- *   when a task with a different CLOSid is scheduled in.
- *
- * Must be called with preemption disabled.
- */
-static inline void intel_rdt_sched_in(void)
-{
-	if (static_branch_likely(&rdt_enable_key)) {
-		struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-		int closid;
-
-		/*
-		 * If this task has a closid assigned, use it.
-		 * Else use the closid assigned to this cpu.
-		 */
-		closid = current->closid;
-		if (closid == 0)
-			closid = this_cpu_read(cpu_closid);
-
-		if (closid != state->closid) {
-			state->closid = closid;
-			wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
-		}
-	}
-}
-
-#else
-
-static inline void intel_rdt_sched_in(void) {}
-
-#endif /* CONFIG_INTEL_RDT_A */
-#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
deleted file mode 100644
index b31081b89407..000000000000
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _ASM_X86_INTEL_RDT_COMMON_H
-#define _ASM_X86_INTEL_RDT_COMMON_H
-
-#define MSR_IA32_PQR_ASSOC	0x0c8f
-
-/**
- * struct intel_pqr_state - State cache for the PQR MSR
- * @rmid:		The cached Resource Monitoring ID
- * @closid:		The cached Class Of Service ID
- * @rmid_usecnt:	The usage counter for rmid
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct intel_pqr_state {
-	u32			rmid;
-	u32			closid;
-	int			rmid_usecnt;
-};
-
-DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
-
-#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
diff --git a/arch/x86/include/asm/intel_rdt_sched.h b/arch/x86/include/asm/intel_rdt_sched.h
new file mode 100644
index 000000000000..b4bbf8b21512
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt_sched.h
@@ -0,0 +1,92 @@
+#ifndef _ASM_X86_INTEL_RDT_SCHED_H
+#define _ASM_X86_INTEL_RDT_SCHED_H
+
+#ifdef CONFIG_INTEL_RDT
+
+#include <linux/sched.h>
+#include <linux/jump_label.h>
+
+#define IA32_PQR_ASSOC	0x0c8f
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @cur_rmid:		The cached Resource Monitoring ID
+ * @cur_closid:	The cached Class Of Service ID
+ * @default_rmid:	The user assigned Resource Monitoring ID
+ * @default_closid:	The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+	u32			cur_rmid;
+	u32			cur_closid;
+	u32			default_rmid;
+	u32			default_closid;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+
+/*
+ * __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ *   which supports resource control or monitoring and we enable by
+ *   mounting the resctrl file system.
+ * - Caches the per cpu CLOSid/RMID values and does the MSR write only
+ *   when a task with a different CLOSid/RMID is scheduled in.
+ * - We allocate RMIDs/CLOSids globally in order to keep this as
+ *   simple as possible.
+ * Must be called with preemption disabled.
+ */
+static void __intel_rdt_sched_in(void)
+{
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+	u32 closid = state->default_closid;
+	u32 rmid = state->default_rmid;
+
+	/*
+	 * If this task has a closid/rmid assigned, use it.
+	 * Else use the closid/rmid assigned to this cpu.
+	 */
+	if (static_branch_likely(&rdt_alloc_enable_key)) {
+		if (current->closid)
+			closid = current->closid;
+	}
+
+	if (static_branch_likely(&rdt_mon_enable_key)) {
+		if (current->rmid)
+			rmid = current->rmid;
+	}
+
+	if (closid != state->cur_closid || rmid != state->cur_rmid) {
+		state->cur_closid = closid;
+		state->cur_rmid = rmid;
+		wrmsr(IA32_PQR_ASSOC, rmid, closid);
+	}
+}
+
+static inline void intel_rdt_sched_in(void)
+{
+	if (static_branch_likely(&rdt_enable_key))
+		__intel_rdt_sched_in();
+}
+
+#else
+
+static inline void intel_rdt_sched_in(void) {}
+
+#endif /* CONFIG_INTEL_RDT */
+
+#endif /* _ASM_X86_INTEL_RDT_SCHED_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 48febf07e828..c40a95c33bb8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -69,6 +69,9 @@ build_mmio_write(__writeb, "b", unsigned char, "q", )
 build_mmio_write(__writew, "w", unsigned short, "r", )
 build_mmio_write(__writel, "l", unsigned int, "r", )
 
+#define readb readb
+#define readw readw
+#define readl readl
 #define readb_relaxed(a) __readb(a)
 #define readw_relaxed(a) __readw(a)
 #define readl_relaxed(a) __readl(a)
@@ -76,6 +79,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
 #define __raw_readw __readw
 #define __raw_readl __readl
 
+#define writeb writeb
+#define writew writew
+#define writel writel
 #define writeb_relaxed(v, a) __writeb(v, a)
 #define writew_relaxed(v, a) __writew(v, a)
 #define writel_relaxed(v, a) __writel(v, a)
@@ -88,13 +94,15 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
 #ifdef CONFIG_X86_64
 
 build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
+build_mmio_read(__readq, "q", unsigned long, "=r", )
 build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
+build_mmio_write(__writeq, "q", unsigned long, "r", )
 
-#define readq_relaxed(a)	readq(a)
-#define writeq_relaxed(v, a)	writeq(v, a)
+#define readq_relaxed(a)	__readq(a)
+#define writeq_relaxed(v, a)	__writeq(v, a)
 
-#define __raw_readq(a)		readq(a)
-#define __raw_writeq(val, addr)	writeq(val, addr)
+#define __raw_readq		__readq
+#define __raw_writeq		__writeq
 
 /* Let people know that we have them */
 #define readq			readq
@@ -119,6 +127,7 @@ static inline phys_addr_t virt_to_phys(volatile void *address)
 {
 	return __pa(address);
 }
+#define virt_to_phys virt_to_phys
 
 /**
  *	phys_to_virt	-	map physical address to virtual
@@ -137,6 +146,7 @@ static inline void *phys_to_virt(phys_addr_t address)
 {
 	return __va(address);
 }
+#define phys_to_virt phys_to_virt
 
 /*
  * Change "struct page" to physical address.
@@ -169,11 +179,14 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
  * else, you probably want one of the following.
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+#define ioremap_nocache ioremap_nocache
 extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
 #define ioremap_uc ioremap_uc
 
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+#define ioremap_cache ioremap_cache
 extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+#define ioremap_prot ioremap_prot
 
 /**
  * ioremap     -   map bus memory into CPU space
@@ -193,8 +206,10 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
 {
 	return ioremap_nocache(offset, size);
 }
+#define ioremap ioremap
 
 extern void iounmap(volatile void __iomem *addr);
+#define iounmap iounmap
 
 extern void set_iounmap_nonlazy(void);
 
@@ -203,53 +218,6 @@ extern void set_iounmap_nonlazy(void);
 #include <asm-generic/iomap.h>
 
 /*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-/**
- * memset_io	Set a range of I/O memory to a constant value
- * @addr:	The beginning of the I/O-memory range to set
- * @val:	The value to set the memory to
- * @count:	The number of bytes to set
- *
- * Set a range of I/O memory to a given value.
- */
-static inline void
-memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
-{
-	memset((void __force *)addr, val, count);
-}
-
-/**
- * memcpy_fromio	Copy a block of data from I/O memory
- * @dst:		The (RAM) destination for the copy
- * @src:		The (I/O memory) source for the data
- * @count:		The number of bytes to copy
- *
- * Copy a block of data from I/O memory.
- */
-static inline void
-memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
-{
-	memcpy(dst, (const void __force *)src, count);
-}
-
-/**
- * memcpy_toio		Copy a block of data into I/O memory
- * @dst:		The (I/O memory) destination for the copy
- * @src:		The (RAM) source for the data
- * @count:		The number of bytes to copy
- *
- * Copy a block of data to I/O memory.
- */
-static inline void
-memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
-{
-	memcpy((void __force *)dst, src, count);
-}
-
-/*
  * ISA space is 'always mapped' on a typical x86 system, no need to
  * explicitly ioremap() it. The fact that the ISA IO space is mapped
  * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
@@ -341,13 +309,38 @@ BUILDIO(b, b, char)
 BUILDIO(w, w, short)
 BUILDIO(l, , int)
 
+#define inb inb
+#define inw inw
+#define inl inl
+#define inb_p inb_p
+#define inw_p inw_p
+#define inl_p inl_p
+#define insb insb
+#define insw insw
+#define insl insl
+
+#define outb outb
+#define outw outw
+#define outl outl
+#define outb_p outb_p
+#define outw_p outw_p
+#define outl_p outl_p
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
+
 extern void *xlate_dev_mem_ptr(phys_addr_t phys);
 extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
 
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+
 extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 				enum page_cache_mode pcm);
 extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
+#define ioremap_wc ioremap_wc
 extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
+#define ioremap_wt ioremap_wt
 
 extern bool is_early_ioremap_ptep(pte_t *ptep);
 
@@ -365,6 +358,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 
 #define IO_SPACE_LIMIT 0xffff
 
+#include <asm-generic/io.h>
+#undef PCI_IOBASE
+
 #ifdef CONFIG_MTRR
 extern int __must_check arch_phys_wc_index(int handle);
 #define arch_phys_wc_index arch_phys_wc_index
@@ -381,4 +377,12 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
 #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
 #endif
 
+extern bool arch_memremap_can_ram_remap(resource_size_t offset,
+					unsigned long size,
+					unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
+extern bool phys_mem_access_encrypted(unsigned long phys_addr,
+				      unsigned long size);
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 668cca540025..9958ceea2fa3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -42,10 +42,6 @@ extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
 
-/* Interrupt vector management */
-extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
-extern int vector_used_by_percpu_irq(unsigned int vector);
-
 extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index f70604125286..ddbb8ea0f5a9 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -3,9 +3,17 @@
 
 #include <asm/cpufeature.h>
 
+#ifdef CONFIG_X86_LOCAL_APIC
 static inline bool arch_irq_work_has_interrupt(void)
 {
 	return boot_cpu_has(X86_FEATURE_APIC);
 }
+extern void arch_irq_work_raise(void);
+#else
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return false;
+}
+#endif
 
 #endif /* _ASM_IRQ_WORK_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205489f0..942c1f444da8 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -147,7 +147,8 @@ unsigned long
 relocate_kernel(unsigned long indirection_page,
 		unsigned long page_list,
 		unsigned long start_address,
-		unsigned int preserve_context);
+		unsigned int preserve_context,
+		unsigned int sme_active);
 #endif
 
 #define ARCH_HAS_KIMAGE_ARCH
@@ -207,6 +208,14 @@ struct kexec_entry64_regs {
 	uint64_t r15;
 	uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+				       gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 92c9032502d8..369e41c23f07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1079,7 +1079,7 @@ void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-		u64 acc_track_mask);
+		u64 acc_track_mask, u64 me_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
deleted file mode 100644
index 73d0c9b92087..000000000000
--- a/arch/x86/include/asm/lguest.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _ASM_X86_LGUEST_H
-#define _ASM_X86_LGUEST_H
-
-#define GDT_ENTRY_LGUEST_CS	10
-#define GDT_ENTRY_LGUEST_DS	11
-#define LGUEST_CS		(GDT_ENTRY_LGUEST_CS * 8)
-#define LGUEST_DS		(GDT_ENTRY_LGUEST_DS * 8)
-
-#ifndef __ASSEMBLY__
-#include <asm/desc.h>
-
-#define GUEST_PL 1
-
-/* Page for Switcher text itself, then two pages per cpu */
-#define SWITCHER_TEXT_PAGES (1)
-#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids)
-#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES)
-
-/* Where we map the Switcher, in both Host and Guest. */
-extern unsigned long switcher_addr;
-
-/* Found in switcher.S */
-extern unsigned long default_idt_entries[];
-
-/* Declarations for definitions in arch/x86/lguest/head_32.S */
-extern char lguest_noirq_iret[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_pushf[], lgend_pushf[];
-
-extern void lguest_iret(void);
-extern void lguest_init(void);
-
-struct lguest_regs {
-	/* Manually saved part. */
-	unsigned long eax, ebx, ecx, edx;
-	unsigned long esi, edi, ebp;
-	unsigned long gs;
-	unsigned long fs, ds, es;
-	unsigned long trapnum, errcode;
-	/* Trap pushed part */
-	unsigned long eip;
-	unsigned long cs;
-	unsigned long eflags;
-	unsigned long esp;
-	unsigned long ss;
-};
-
-/* This is a guest-specific page (mapped ro) into the guest. */
-struct lguest_ro_state {
-	/* Host information we need to restore when we switch back. */
-	u32 host_cr3;
-	struct desc_ptr host_idt_desc;
-	struct desc_ptr host_gdt_desc;
-	u32 host_sp;
-
-	/* Fields which are used when guest is running. */
-	struct desc_ptr guest_idt_desc;
-	struct desc_ptr guest_gdt_desc;
-	struct x86_hw_tss guest_tss;
-	struct desc_struct guest_idt[IDT_ENTRIES];
-	struct desc_struct guest_gdt[GDT_ENTRIES];
-};
-
-struct lg_cpu_arch {
-	/* The GDT entries copied into lguest_ro_state when running. */
-	struct desc_struct gdt[GDT_ENTRIES];
-
-	/* The IDT entries: some copied into lguest_ro_state when running. */
-	struct desc_struct idt[IDT_ENTRIES];
-
-	/* The address of the last guest-visible pagefault (ie. cr2). */
-	unsigned long last_pagefault;
-};
-
-static inline void lguest_set_ts(void)
-{
-	u32 cr0;
-
-	cr0 = read_cr0();
-	if (!(cr0 & 8))
-		write_cr0(cr0 | 8);
-}
-
-/* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT \
-	((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff))
-#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff))
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_LGUEST_H */
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
deleted file mode 100644
index 6c119cfae218..000000000000
--- a/arch/x86/include/asm/lguest_hcall.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Architecture specific portion of the lguest hypercalls */
-#ifndef _ASM_X86_LGUEST_HCALL_H
-#define _ASM_X86_LGUEST_HCALL_H
-
-#define LHCALL_FLUSH_ASYNC	0
-#define LHCALL_LGUEST_INIT	1
-#define LHCALL_SHUTDOWN		2
-#define LHCALL_NEW_PGTABLE	4
-#define LHCALL_FLUSH_TLB	5
-#define LHCALL_LOAD_IDT_ENTRY	6
-#define LHCALL_SET_STACK	7
-#define LHCALL_SET_CLOCKEVENT	9
-#define LHCALL_HALT		10
-#define LHCALL_SET_PMD		13
-#define LHCALL_SET_PTE		14
-#define LHCALL_SET_PGD		15
-#define LHCALL_LOAD_TLS		16
-#define LHCALL_LOAD_GDT_ENTRY	18
-#define LHCALL_SEND_INTERRUPTS	19
-
-#define LGUEST_TRAP_ENTRY 0x1F
-
-/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */
-#define LGUEST_SHUTDOWN_POWEROFF	1
-#define LGUEST_SHUTDOWN_RESTART		2
-
-#ifndef __ASSEMBLY__
-#include <asm/hw_irq.h>
-
-/*G:030
- * But first, how does our Guest contact the Host to ask for privileged
- * operations?  There are two ways: the direct way is to make a "hypercall",
- * to make requests of the Host Itself.
- *
- * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts).  Seventeen hypercalls are
- * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
- * If a return value makes sense, it's returned in %eax.
- *
- * Grossly invalid calls result in Sudden Death at the hands of the vengeful
- * Host, rather than returning failure.  This reflects Winston Churchill's
- * definition of a gentleman: "someone who is only rude intentionally".
- */
-static inline unsigned long
-hcall(unsigned long call,
-      unsigned long arg1, unsigned long arg2, unsigned long arg3,
-      unsigned long arg4)
-{
-	/* "int" is the Intel instruction to trigger a trap. */
-	asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
-		     /* The call in %eax (aka "a") might be overwritten */
-		     : "=a"(call)
-		       /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
-		     : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
-		       /* "memory" means this might write somewhere in memory.
-			* This isn't true for all calls, but it's safe to tell
-			* gcc that it might happen so it doesn't get clever. */
-		     : "memory");
-	return call;
-}
-/*:*/
-
-/* Can't use our min() macro here: needs to be a constant */
-#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
-
-#define LHCALL_RING_SIZE 64
-struct hcall_args {
-	/* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */
-	unsigned long arg0, arg1, arg2, arg3, arg4;
-};
-
-#endif /* !__ASSEMBLY__ */
-#endif /* _ASM_X86_LGUEST_HCALL_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..8e618fcf1f7c
--- /dev/null
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -0,0 +1,80 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __X86_MEM_ENCRYPT_H__
+#define __X86_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#include <linux/init.h>
+
+#include <asm/bootparam.h>
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern unsigned long sme_me_mask;
+
+void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
+			 unsigned long decrypted_kernel_vaddr,
+			 unsigned long kernel_len,
+			 unsigned long encryption_wa,
+			 unsigned long encryption_pgd);
+
+void __init sme_early_encrypt(resource_size_t paddr,
+			      unsigned long size);
+void __init sme_early_decrypt(resource_size_t paddr,
+			      unsigned long size);
+
+void __init sme_map_bootdata(char *real_mode_data);
+void __init sme_unmap_bootdata(char *real_mode_data);
+
+void __init sme_early_init(void);
+
+void __init sme_encrypt_kernel(void);
+void __init sme_enable(struct boot_params *bp);
+
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
+
+#else	/* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define sme_me_mask	0UL
+
+static inline void __init sme_early_encrypt(resource_size_t paddr,
+					    unsigned long size) { }
+static inline void __init sme_early_decrypt(resource_size_t paddr,
+					    unsigned long size) { }
+
+static inline void __init sme_map_bootdata(char *real_mode_data) { }
+static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+
+static inline void __init sme_early_init(void) { }
+
+static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_enable(struct boot_params *bp) { }
+
+#endif	/* CONFIG_AMD_MEM_ENCRYPT */
+
+/*
+ * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
+ * writing to or comparing values from the cr3 register.  Having the
+ * encryption mask set in cr3 enables the PGD entry to be encrypted and
+ * avoid special case handling of PGD allocations.
+ */
+#define __sme_pa(x)		(__pa(x) | sme_me_mask)
+#define __sme_pa_nodebug(x)	(__pa_nodebug(x) | sme_me_mask)
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 79b647a7ebd0..bb8c597c2248 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,12 +3,28 @@
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/atomic.h>
 
 /*
- * The x86 doesn't have a mmu context, but
- * we put the segment information here.
+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
  */
 typedef struct {
+	/*
+	 * ctx_id uniquely identifies this mm_struct.  A ctx_id will never
+	 * be reused, and zero is not a valid ctx_id.
+	 */
+	u64 ctx_id;
+
+	/*
+	 * Any code that needs to do any sort of TLB flushing for this
+	 * mm will first make its changes to the page tables, then
+	 * increment tlb_gen, then flush.  This lets the low-level
+	 * flushing code keep track of what needs flushing.
+	 *
+	 * This is not used on Xen PV.
+	 */
+	atomic64_t tlb_gen;
+
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	struct ldt_struct *ldt;
 #endif
@@ -37,6 +53,11 @@ typedef struct {
 #endif
 } mm_context_t;
 
+#define INIT_MM_CONTEXT(mm)						\
+	.context = {							\
+		.ctx_id = 1,						\
+	}
+
 void leave_mm(int cpu);
 
 #endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7a234be7e298..7ae318c340d9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,6 +12,9 @@
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/mpx.h>
+
+extern atomic64_t last_mm_ctx_id;
+
 #ifndef CONFIG_PARAVIRT
 static inline void paravirt_activate_mm(struct mm_struct *prev,
 					struct mm_struct *next)
@@ -125,13 +128,18 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
-	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+	int cpu = smp_processor_id();
+
+	if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+		cpumask_clear_cpu(cpu, mm_cpumask(mm));
 }
 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+	atomic64_set(&mm->context.tlb_gen, 0);
+
 	#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
 		/* pkey 0 is the default and always allocated */
@@ -290,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
 {
 	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
+	if (static_cpu_has(X86_FEATURE_PCID))
+		cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
 
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index e3b7819caeef..9eb7c718aaf8 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -2,6 +2,15 @@
 #define _ASM_X86_MODULE_H
 
 #include <asm-generic/module.h>
+#include <asm/orc_types.h>
+
+struct mod_arch_specific {
+#ifdef CONFIG_ORC_UNWINDER
+	unsigned int num_orcs;
+	int *orc_unwind_ip;
+	struct orc_entry *orc_unwind;
+#endif
+};
 
 #ifdef CONFIG_X86_64
 /* X86_64 does not define MODULE_PROC_FAMILY */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a0d662be4c5b..7d7404756bb4 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -73,6 +73,9 @@ static inline void mpx_mm_init(struct mm_struct *mm)
 }
 void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long start, unsigned long end);
+
+unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
 #else
 static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 {
@@ -94,6 +97,12 @@ static inline void mpx_notify_unmap(struct mm_struct *mm,
 				    unsigned long start, unsigned long end)
 {
 }
+
+static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
+		unsigned long len, unsigned long flags)
+{
+	return addr;
+}
 #endif /* CONFIG_X86_INTEL_MPX */
 
 #endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2b58c8c1eeaa..63cc96f064dc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,6 +3,8 @@
 
 #include <linux/types.h>
 #include <linux/atomic.h>
+#include <linux/nmi.h>
+#include <asm/io.h>
 #include <asm/hyperv.h>
 
 /*
@@ -28,6 +30,8 @@ struct ms_hyperv_info {
 	u32 features;
 	u32 misc_features;
 	u32 hints;
+	u32 max_vp_index;
+	u32 max_lp_index;
 };
 
 extern struct ms_hyperv_info ms_hyperv;
@@ -168,12 +172,155 @@ void hv_remove_crash_handler(void);
 
 #if IS_ENABLED(CONFIG_HYPERV)
 extern struct clocksource *hyperv_cs;
+extern void *hv_hypercall_pg;
+
+static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
+{
+	u64 input_address = input ? virt_to_phys(input) : 0;
+	u64 output_address = output ? virt_to_phys(output) : 0;
+	u64 hv_status;
+	register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+	if (!hv_hypercall_pg)
+		return U64_MAX;
+
+	__asm__ __volatile__("mov %4, %%r8\n"
+			     "call *%5"
+			     : "=a" (hv_status), "+r" (__sp),
+			       "+c" (control), "+d" (input_address)
+			     :  "r" (output_address), "m" (hv_hypercall_pg)
+			     : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
+	u32 input_address_hi = upper_32_bits(input_address);
+	u32 input_address_lo = lower_32_bits(input_address);
+	u32 output_address_hi = upper_32_bits(output_address);
+	u32 output_address_lo = lower_32_bits(output_address);
+
+	if (!hv_hypercall_pg)
+		return U64_MAX;
+
+	__asm__ __volatile__("call *%7"
+			     : "=A" (hv_status),
+			       "+c" (input_address_lo), "+r" (__sp)
+			     : "A" (control),
+			       "b" (input_address_hi),
+			       "D"(output_address_hi), "S"(output_address_lo),
+			       "m" (hv_hypercall_pg)
+			     : "cc", "memory");
+#endif /* !x86_64 */
+	return hv_status;
+}
+
+#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT		BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET	17
+#define HV_HYPERCALL_REP_COMP_OFFSET	32
+#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET	48
+#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
+
+/* Fast hypercall with 8 bytes of input and no output */
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+	register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+	{
+		__asm__ __volatile__("call *%4"
+				     : "=a" (hv_status), "+r" (__sp),
+				       "+c" (control), "+d" (input1)
+				     : "m" (hv_hypercall_pg)
+				     : "cc", "r8", "r9", "r10", "r11");
+	}
+#else
+	{
+		u32 input1_hi = upper_32_bits(input1);
+		u32 input1_lo = lower_32_bits(input1);
+
+		__asm__ __volatile__ ("call *%5"
+				      : "=A"(hv_status),
+					"+c"(input1_lo),
+					"+r"(__sp)
+				      :	"A" (control),
+					"b" (input1_hi),
+					"m" (hv_hypercall_pg)
+				      : "cc", "edi", "esi");
+	}
+#endif
+		return hv_status;
+}
+
+/*
+ * Rep hypercalls. Callers of this functions are supposed to ensure that
+ * rep_count and varhead_size comply with Hyper-V hypercall definition.
+ */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+				      void *input, void *output)
+{
+	u64 control = code;
+	u64 status;
+	u16 rep_comp;
+
+	control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
+	control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+
+	do {
+		status = hv_do_hypercall(control, input, output);
+		if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
+			return status;
+
+		/* Bits 32-43 of status have 'Reps completed' data. */
+		rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
+			HV_HYPERCALL_REP_COMP_OFFSET;
+
+		control &= ~HV_HYPERCALL_REP_START_MASK;
+		control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
+
+		touch_nmi_watchdog();
+	} while (rep_comp < rep_count);
+
+	return status;
+}
+
+/*
+ * Hypervisor's notion of virtual processor ID is different from
+ * Linux' notion of CPU ID. This information can only be retrieved
+ * in the context of the calling CPU. Setup a map for easy access
+ * to this information.
+ */
+extern u32 *hv_vp_index;
+
+/**
+ * hv_cpu_number_to_vp_number() - Map CPU to VP.
+ * @cpu_number: CPU number in Linux terms
+ *
+ * This function returns the mapping between the Linux processor
+ * number and the hypervisor's virtual processor number, useful
+ * in making hypercalls and such that talk about specific
+ * processors.
+ *
+ * Return: Virtual processor number in Hyper-V terms
+ */
+static inline int hv_cpu_number_to_vp_number(int cpu_number)
+{
+	return hv_vp_index[cpu_number];
+}
 
 void hyperv_init(void);
+void hyperv_setup_mmu_ops(void);
+void hyper_alloc_mmu(void);
 void hyperv_report_panic(struct pt_regs *regs);
 bool hv_is_hypercall_page_setup(void);
 void hyperv_cleanup(void);
-#endif
+#else /* CONFIG_HYPERV */
+static inline void hyperv_init(void) {}
+static inline bool hv_is_hypercall_page_setup(void) { return false; }
+static inline void hyperv_cleanup(void) {}
+static inline void hyperv_setup_mmu_ops(void) {}
+#endif /* CONFIG_HYPERV */
+
 #ifdef CONFIG_HYPERV_TSCPAGE
 struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
 static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5573c75f8e4c..17f5c12e1afd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -356,6 +356,8 @@
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_K8_SYSCFG			0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
new file mode 100644
index 000000000000..91c8d868424d
--- /dev/null
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ORC_LOOKUP_H
+#define _ORC_LOOKUP_H
+
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+#ifndef LINKER_SCRIPT
+
+extern unsigned int orc_lookup[];
+extern unsigned int orc_lookup_end[];
+
+#define LOOKUP_START_IP		(unsigned long)_stext
+#define LOOKUP_STOP_IP		(unsigned long)_etext
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* _ORC_LOOKUP_H */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
new file mode 100644
index 000000000000..9c9dc579bd7d
--- /dev/null
+++ b/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_DX			2
+#define ORC_REG_DI			3
+#define ORC_REG_BP			4
+#define ORC_REG_SP			5
+#define ORC_REG_R10			6
+#define ORC_REG_R13			7
+#define ORC_REG_BP_INDIRECT		8
+#define ORC_REG_SP_INDIRECT		9
+#define ORC_REG_MAX			15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call).  Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL			0
+#define ORC_TYPE_REGS			1
+#define ORC_TYPE_REGS_IRET		2
+#define UNWIND_HINT_TYPE_SAVE		3
+#define UNWIND_HINT_TYPE_RESTORE	4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		bp_offset;
+	unsigned	sp_reg:4;
+	unsigned	bp_reg:4;
+	unsigned	type:2;
+} __packed;
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+	u32		ip;
+	s16		sp_offset;
+	u8		sp_reg;
+	u8		type;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index b4a0d43248cf..b50df06ad251 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -51,6 +51,10 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
+#ifdef CONFIG_X86_MCE
+#define arch_unmap_kpfn arch_unmap_kpfn
+#endif
+
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 7bd0099384ca..b98ed9d14630 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -3,6 +3,7 @@
 
 #include <linux/const.h>
 #include <linux/types.h>
+#include <linux/mem_encrypt.h>
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT		12
@@ -15,7 +16,7 @@
 #define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
 
-#define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
+#define __PHYSICAL_MASK		((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1)))
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
 
 /* Cast *PAGE_MASK to a signed type so that it is sign-extended if
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9ccac1926587..c25dd22f7c70 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -960,11 +960,6 @@ extern void default_banner(void);
 #define GET_CR2_INTO_RAX				\
 	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
 
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
-		  CLBR_NONE,						\
-		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
-
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9ffc36bfe4cd..6b64fc6367f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -196,9 +196,6 @@ struct pv_irq_ops {
 	void (*safe_halt)(void);
 	void (*halt)(void);
 
-#ifdef CONFIG_X86_64
-	void (*adjust_exception_frame)(void);
-#endif
 } __no_randomize_layout;
 
 struct pv_mmu_ops {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 77037b6f1caa..bbeae4a2bd01 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_PGTABLE_H
 #define _ASM_X86_PGTABLE_H
 
+#include <linux/mem_encrypt.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 
@@ -13,9 +14,18 @@
 		     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))	\
 	 : (prot))
 
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot)	__pgprot(__sme_set(pgprot_val(prot)))
+#define pgprot_decrypted(prot)	__pgprot(__sme_clr(pgprot_val(prot)))
+
 #ifndef __ASSEMBLY__
 #include <asm/x86_init.h>
 
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
 void ptdump_walk_pgd_level_checkwx(void);
 
@@ -38,6 +48,8 @@ extern struct list_head pgd_list;
 
 extern struct mm_struct *pgd_page_get_mm(struct page *page);
 
+extern pmdval_t early_pmd_flags;
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -195,6 +207,11 @@ static inline unsigned long p4d_pfn(p4d_t p4d)
 	return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+	return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 static inline int p4d_large(p4d_t p4d)
 {
 	/* No 512 GiB pages yet */
@@ -704,8 +721,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)		\
-	pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd)	pfn_to_page(pmd_pfn(pmd))
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -773,8 +789,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)		\
-	pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud)	pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -824,8 +839,7 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define p4d_page(p4d)		\
-	pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+#define p4d_page(p4d)	pfn_to_page(p4d_pfn(p4d))
 
 /* Find an entry in the third-level page table.. */
 static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
@@ -859,7 +873,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)	pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
 static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index bf9638e1ee42..399261ce904c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -2,6 +2,8 @@
 #define _ASM_X86_PGTABLE_DEFS_H
 
 #include <linux/const.h>
+#include <linux/mem_encrypt.h>
+
 #include <asm/page_types.h>
 
 #define FIRST_USER_ADDRESS	0UL
@@ -121,10 +123,10 @@
 
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
-			 _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
-			 _PAGE_DIRTY)
+#define _PAGE_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
+				 _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW |		\
+				 _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /*
  * Set of bits not changed in pte_modify.  The pte's
@@ -159,6 +161,7 @@ enum page_cache_mode {
 
 #define _PAGE_CACHE_MASK	(_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
 #define _PAGE_NOCACHE		(cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP		(cachemode2protval(_PAGE_CACHE_MODE_WP))
 
 #define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -187,22 +190,42 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_WP		(__PAGE_KERNEL | _PAGE_CACHE_WP)
 
 #define __PAGE_KERNEL_IO		(__PAGE_KERNEL)
 #define __PAGE_KERNEL_IO_NOCACHE	(__PAGE_KERNEL_NOCACHE)
 
-#define PAGE_KERNEL			__pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO			__pgprot(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC		__pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX			__pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_NOCACHE		__pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_LARGE		__pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_EXEC		__pgprot(__PAGE_KERNEL_LARGE_EXEC)
-#define PAGE_KERNEL_VSYSCALL		__pgprot(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VVAR		__pgprot(__PAGE_KERNEL_VVAR)
+#ifndef __ASSEMBLY__
+
+#define _PAGE_ENC	(_AT(pteval_t, sme_me_mask))
+
+#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+			 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
+#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
+			 _PAGE_DIRTY | _PAGE_ENC)
+
+#define __PAGE_KERNEL_ENC	(__PAGE_KERNEL | _PAGE_ENC)
+#define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _PAGE_ENC)
+
+#define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL)
+#define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP)
+
+#define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
+#define PAGE_KERNEL_NOENC	__pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
+#define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_VSYSCALL	__pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
+#define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+
+#define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE	__pgprot(__PAGE_KERNEL_IO_NOCACHE)
 
-#define PAGE_KERNEL_IO			__pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE		__pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#endif	/* __ASSEMBLY__ */
 
 /*         xwr */
 #define __P000	PAGE_NONE
@@ -287,6 +310,11 @@ static inline p4dval_t native_p4d_val(p4d_t p4d)
 #else
 #include <asm-generic/pgtable-nop4d.h>
 
+static inline p4d_t native_make_p4d(pudval_t val)
+{
+	return (p4d_t) { .pgd = native_make_pgd((pgdval_t)val) };
+}
+
 static inline p4dval_t native_p4d_val(p4d_t p4d)
 {
 	return native_pgd_val(p4d.pgd);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 79aa2f98398d..dc723b64acf0 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_PROCESSOR_FLAGS_H
 
 #include <uapi/asm/processor-flags.h>
+#include <linux/mem_encrypt.h>
 
 #ifdef CONFIG_VM86
 #define X86_VM_MASK	X86_EFLAGS_VM
@@ -32,16 +33,18 @@
  * CR3_ADDR_MASK is the mask used by read_cr3_pa().
  */
 #ifdef CONFIG_X86_64
-/* Mask off the address space ID bits. */
-#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
-#define CR3_PCID_MASK 0xFFFull
+/* Mask off the address space ID and SME encryption bits. */
+#define CR3_ADDR_MASK	__sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_PCID_MASK	0xFFFull
+#define CR3_NOFLUSH	BIT_ULL(63)
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
  * a tiny bit of code size by setting all the bits.
  */
-#define CR3_ADDR_MASK 0xFFFFFFFFull
-#define CR3_PCID_MASK 0ull
+#define CR3_ADDR_MASK	0xFFFFFFFFull
+#define CR3_PCID_MASK	0ull
+#define CR3_NOFLUSH	0
 #endif
 
 #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 028245e1c42b..3fa26a61eabc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -22,6 +22,7 @@ struct vm86;
 #include <asm/nops.h>
 #include <asm/special_insns.h>
 #include <asm/fpu/types.h>
+#include <asm/unwind_hints.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -29,6 +30,7 @@ struct vm86;
 #include <linux/math64.h>
 #include <linux/err.h>
 #include <linux/irqflags.h>
+#include <linux/mem_encrypt.h>
 
 /*
  * We handle most unaligned accesses in hardware.  On the other hand
@@ -239,9 +241,14 @@ static inline unsigned long read_cr3_pa(void)
 	return __read_cr3() & CR3_ADDR_MASK;
 }
 
+static inline unsigned long native_read_cr3_pa(void)
+{
+	return __native_read_cr3() & CR3_ADDR_MASK;
+}
+
 static inline void load_cr3(pgd_t *pgdir)
 {
-	write_cr3(__pa(pgdir));
+	write_cr3(__sme_pa(pgdir));
 }
 
 #ifdef CONFIG_X86_32
@@ -661,7 +668,7 @@ static inline void sync_core(void)
 	 * In case NMI unmasking or performance ever becomes a problem,
 	 * the next best option appears to be MOV-to-CR2 and an
 	 * unconditional jump.  That sequence also works on all CPUs,
-	 * but it will fault at CPL3 (i.e. Xen PV and lguest).
+	 * but it will fault at CPL3 (i.e. Xen PV).
 	 *
 	 * CPUID is the conventional way, but it's nasty: it doesn't
 	 * exist on some 486-like CPUs, and it usually exits to a
@@ -684,6 +691,7 @@ static inline void sync_core(void)
 	unsigned int tmp;
 
 	asm volatile (
+		UNWIND_HINT_SAVE
 		"mov %%ss, %0\n\t"
 		"pushq %q0\n\t"
 		"pushq %%rsp\n\t"
@@ -693,6 +701,7 @@ static inline void sync_core(void)
 		"pushq %q0\n\t"
 		"pushq $1f\n\t"
 		"iretq\n\t"
+		UNWIND_HINT_RESTORE
 		"1:"
 		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
 #endif
@@ -802,7 +811,9 @@ static inline void spin_lock_prefetch(const void *x)
  */
 #define IA32_PAGE_OFFSET	PAGE_OFFSET
 #define TASK_SIZE		PAGE_OFFSET
+#define TASK_SIZE_LOW		TASK_SIZE
 #define TASK_SIZE_MAX		TASK_SIZE
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
 #define STACK_TOP		TASK_SIZE
 #define STACK_TOP_MAX		STACK_TOP
 
@@ -842,7 +853,9 @@ static inline void spin_lock_prefetch(const void *x)
  * particular problem by preventing anything from being mapped
  * at the maximum canonical address.
  */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX	((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
+
+#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
 
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
@@ -850,12 +863,14 @@ static inline void spin_lock_prefetch(const void *x)
 #define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
 					0xc0000000 : 0xFFFFe000)
 
+#define TASK_SIZE_LOW		(test_thread_flag(TIF_ADDR32) ? \
+					IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
 #define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
 					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 #define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
 					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
 
-#define STACK_TOP		TASK_SIZE
+#define STACK_TOP		TASK_SIZE_LOW
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
 #define INIT_THREAD  {						\
@@ -876,7 +891,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
  * space during mmap's.
  */
 #define __TASK_UNMAPPED_BASE(task_size)	(PAGE_ALIGN(task_size / 3))
-#define TASK_UNMAPPED_BASE		__TASK_UNMAPPED_BASE(TASK_SIZE)
+#define TASK_UNMAPPED_BASE		__TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
 
 #define KSTK_EIP(task)		(task_pt_regs(task)->ip)
 
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 8d3964fc5f91..b408b1886195 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
 void entry_INT80_compat(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+void xen_entry_INT80_compat(void);
+#endif
 #endif
 
 void x86_configure_nx(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 2b5d686ea9f3..91c04c8e67fa 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -9,6 +9,20 @@
 #ifdef __i386__
 
 struct pt_regs {
+	/*
+	 * NB: 32-bit x86 CPUs are inconsistent as what happens in the
+	 * following cases (where %seg represents a segment register):
+	 *
+	 * - pushl %seg: some do a 16-bit write and leave the high
+	 *   bits alone
+	 * - movl %seg, [mem]: some do a 16-bit write despite the movl
+	 * - IDT entry: some (e.g. 486) will leave the high bits of CS
+	 *   and (if applicable) SS undefined.
+	 *
+	 * Fortunately, x86-32 doesn't read the high bits on POP or IRET,
+	 * so we can just treat all of the segment registers as 16-bit
+	 * values.
+	 */
 	unsigned long bx;
 	unsigned long cx;
 	unsigned long dx;
@@ -16,16 +30,22 @@ struct pt_regs {
 	unsigned long di;
 	unsigned long bp;
 	unsigned long ax;
-	unsigned long ds;
-	unsigned long es;
-	unsigned long fs;
-	unsigned long gs;
+	unsigned short ds;
+	unsigned short __dsh;
+	unsigned short es;
+	unsigned short __esh;
+	unsigned short fs;
+	unsigned short __fsh;
+	unsigned short gs;
+	unsigned short __gsh;
 	unsigned long orig_ax;
 	unsigned long ip;
-	unsigned long cs;
+	unsigned short cs;
+	unsigned short __csh;
 	unsigned long flags;
 	unsigned long sp;
-	unsigned long ss;
+	unsigned short ss;
+	unsigned short __ssh;
 };
 
 #else /* __i386__ */
@@ -176,6 +196,17 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
 	if (offset == offsetof(struct pt_regs, sp) &&
 	    regs->cs == __KERNEL_CS)
 		return kernel_stack_pointer(regs);
+
+	/* The selector fields are 16-bit. */
+	if (offset == offsetof(struct pt_regs, cs) ||
+	    offset == offsetof(struct pt_regs, ss) ||
+	    offset == offsetof(struct pt_regs, ds) ||
+	    offset == offsetof(struct pt_regs, es) ||
+	    offset == offsetof(struct pt_regs, fs) ||
+	    offset == offsetof(struct pt_regs, gs)) {
+		return *(u16 *)((unsigned long)regs + offset);
+
+	}
 #endif
 	return *(unsigned long *)((unsigned long)regs + offset);
 }
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 230e1903acf0..90d91520c13a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -1,6 +1,15 @@
 #ifndef _ARCH_X86_REALMODE_H
 #define _ARCH_X86_REALMODE_H
 
+/*
+ * Flag bit definitions for use with the flags field of the trampoline header
+ * in the CONFIG_X86_64 variant.
+ */
+#define TH_FLAGS_SME_ACTIVE_BIT		0
+#define TH_FLAGS_SME_ACTIVE		BIT(TH_FLAGS_SME_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <asm/io.h>
 
@@ -38,6 +47,7 @@ struct trampoline_header {
 	u64 start;
 	u64 efer;
 	u32 cr4;
+	u32 flags;
 #endif
 };
 
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void)
 void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
new file mode 100644
index 000000000000..ff871210b9f2
--- /dev/null
+++ b/arch/x86/include/asm/refcount.h
@@ -0,0 +1,109 @@
+#ifndef __ASM_X86_REFCOUNT_H
+#define __ASM_X86_REFCOUNT_H
+/*
+ * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
+ * PaX/grsecurity.
+ */
+#include <linux/refcount.h>
+
+/*
+ * This is the first portion of the refcount error handling, which lives in
+ * .text.unlikely, and is jumped to from the CPU flag check (in the
+ * following macros). This saves the refcount value location into CX for
+ * the exception handler to use (in mm/extable.c), and then triggers the
+ * central refcount exception. The fixup address for the exception points
+ * back to the regular execution flow in .text.
+ */
+#define _REFCOUNT_EXCEPTION				\
+	".pushsection .text.unlikely\n"			\
+	"111:\tlea %[counter], %%" _ASM_CX "\n"		\
+	"112:\t" ASM_UD0 "\n"				\
+	ASM_UNREACHABLE					\
+	".popsection\n"					\
+	"113:\n"					\
+	_ASM_EXTABLE_REFCOUNT(112b, 113b)
+
+/* Trigger refcount exception if refcount result is negative. */
+#define REFCOUNT_CHECK_LT_ZERO				\
+	"js 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+#define REFCOUNT_CHECK_LE_ZERO				\
+	"jz 111f\n\t"					\
+	REFCOUNT_CHECK_LT_ZERO
+
+/* Trigger refcount exception unconditionally. */
+#define REFCOUNT_ERROR					\
+	"jmp 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
+
+static __always_inline void refcount_add(unsigned int i, refcount_t *r)
+{
+	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
+		REFCOUNT_CHECK_LT_ZERO
+		: [counter] "+m" (r->refs.counter)
+		: "ir" (i)
+		: "cc", "cx");
+}
+
+static __always_inline void refcount_inc(refcount_t *r)
+{
+	asm volatile(LOCK_PREFIX "incl %0\n\t"
+		REFCOUNT_CHECK_LT_ZERO
+		: [counter] "+m" (r->refs.counter)
+		: : "cc", "cx");
+}
+
+static __always_inline void refcount_dec(refcount_t *r)
+{
+	asm volatile(LOCK_PREFIX "decl %0\n\t"
+		REFCOUNT_CHECK_LE_ZERO
+		: [counter] "+m" (r->refs.counter)
+		: : "cc", "cx");
+}
+
+static __always_inline __must_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
+				  r->refs.counter, "er", i, "%0", e);
+}
+
+static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+	GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
+				 r->refs.counter, "%0", e);
+}
+
+static __always_inline __must_check
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	int c, result;
+
+	c = atomic_read(&(r->refs));
+	do {
+		if (unlikely(c == 0))
+			return false;
+
+		result = c + i;
+
+		/* Did we try to increment from/to an undesirable state? */
+		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
+			asm volatile(REFCOUNT_ERROR
+				     : : [counter] "m" (r->refs.counter)
+				     : "cc", "cx");
+			break;
+		}
+
+	} while (!atomic_try_cmpxchg(&(r->refs), &c, result));
+
+	return c != 0;
+}
+
+static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+	return refcount_add_not_zero(1, r);
+}
+
+#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 661dd305694a..045f99211a99 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,45 +1,56 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
+#define __CLOBBERS_MEM		"memory"
+#define __CLOBBERS_MEM_CC_CX	"memory", "cc", "cx"
+
 #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
 
 /* Use asm goto */
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...)			\
 do {									\
 	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
-			: : "m" (var), ## __VA_ARGS__ 			\
-			: "memory" : cc_label);				\
+			: : [counter] "m" (var), ## __VA_ARGS__		\
+			: clobbers : cc_label);				\
 	return 0;							\
 cc_label:								\
 	return 1;							\
 } while (0)
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
-	__GEN_RMWcc(op " " arg0, var, cc)
+#define __BINARY_RMWcc_ARG	" %1, "
 
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
 
 #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 /* Use flags output or a set instruction */
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...)			\
 do {									\
 	bool c;								\
 	asm volatile (fullop ";" CC_SET(cc)				\
-			: "+m" (var), CC_OUT(cc) (c)			\
-			: __VA_ARGS__ : "memory");			\
+			: [counter] "+m" (var), CC_OUT(cc) (c)		\
+			: __VA_ARGS__ : clobbers);			\
 	return c;							\
 } while (0)
 
+#define __BINARY_RMWcc_ARG	" %2, "
+
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
-	__GEN_RMWcc(op " " arg0, var, cc)
+	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc)		\
+	__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,			\
+		    __CLOBBERS_MEM_CC_CX)
 
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,		\
+		    __CLOBBERS_MEM, vcon (val))
 
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc)	\
+	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,	\
+		    __CLOBBERS_MEM_CC_CX, vcon (val))
 
 #endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 1549caa098f0..066aaf813141 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -238,9 +238,7 @@
 #ifndef __ASSEMBLY__
 
 extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
-#ifdef CONFIG_TRACING
-# define trace_early_idt_handler_array early_idt_handler_array
-#endif
+extern void early_ignore_irq(void);
 
 /*
  * Load a segment. Fall back on loading the zero segment if something goes
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index eaec6c364e42..cd71273ec49d 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -11,6 +11,7 @@
  * Executability : eXeutable, NoteXecutable
  * Read/Write    : ReadOnly, ReadWrite
  * Presence      : NotPresent
+ * Encryption    : Encrypted, Decrypted
  *
  * Within a category, the attributes are mutually exclusive.
  *
@@ -42,6 +43,8 @@ int set_memory_wt(unsigned long addr, int numpages);
 int set_memory_wb(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
 
 int set_memory_array_uc(unsigned long *addr, int addrinarray);
 int set_memory_array_wc(unsigned long *addr, int addrinarray);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index e4585a393965..a65cf544686a 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -39,6 +39,7 @@ static inline void vsmp_init(void) { }
 #endif
 
 void setup_bios_corruption_check(void);
+void early_platform_quirks(void);
 
 extern unsigned long saved_video_mode;
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e00e1bd6e7b3..5161da1a0fa0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -98,6 +98,7 @@ struct thread_info {
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 #define TIF_X32			30	/* 32-bit native x86-64 binary */
+#define TIF_FSCHECK		31	/* Check FS is USER_DS on return */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -122,6 +123,7 @@ struct thread_info {
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 
 /*
  * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
@@ -137,7 +139,8 @@ struct thread_info {
 	(_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING |	\
 	 _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU |	\
 	 _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE |	\
-	 _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT)
+	 _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT |	\
+	 _TIF_FSCHECK)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index c7797307fc2b..79a4ca6a9606 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -15,4 +15,18 @@
 
 #include <asm-generic/tlb.h>
 
+/*
+ * While x86 architecture in general requires an IPI to perform TLB
+ * shootdown, enablement code for several hypervisors overrides
+ * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
+ * a hypercall. To keep software pagetable walkers safe in this case we
+ * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment
+ * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * for more details.
+ */
+static inline void __tlb_remove_table(void *table)
+{
+	free_page_and_swap_cache(table);
+}
+
 #endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50ea3482e1d1..4893abf7f74f 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
 	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
 }
 
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+	u64 new_tlb_gen;
+
+	/*
+	 * Bump the generation count.  This also serves as a full barrier
+	 * that synchronizes with switch_mm(): callers are required to order
+	 * their read of mm_cpumask after their writes to the paging
+	 * structures.
+	 */
+	smp_mb__before_atomic();
+	new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
+	smp_mb__after_atomic();
+
+	return new_tlb_gen;
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -65,6 +82,17 @@ static inline void invpcid_flush_all_nonglobals(void)
 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
 #endif
 
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in
+ * two cache lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
+struct tlb_context {
+	u64 ctx_id;
+	u64 tlb_gen;
+};
+
 struct tlb_state {
 	/*
 	 * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
@@ -73,13 +101,35 @@ struct tlb_state {
 	 * mode even if we've already switched back to swapper_pg_dir.
 	 */
 	struct mm_struct *loaded_mm;
-	int state;
+	u16 loaded_mm_asid;
+	u16 next_asid;
 
 	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
 	 * disabling interrupts when modifying either one.
 	 */
 	unsigned long cr4;
+
+	/*
+	 * This is a list of all contexts that might exist in the TLB.
+	 * There is one per ASID that we use, and the ASID (what the
+	 * CPU calls PCID) is the index into ctxts.
+	 *
+	 * For each context, ctx_id indicates which mm the TLB's user
+	 * entries came from.  As an invariant, the TLB will never
+	 * contain entries that are out-of-date as when that mm reached
+	 * the tlb_gen in the list.
+	 *
+	 * To be clear, this means that it's legal for the TLB code to
+	 * flush the TLB without updating tlb_gen.  This can happen
+	 * (for now, at least) due to paravirt remote flushes.
+	 *
+	 * NB: context 0 is a bit special, since it's also used by
+	 * various bits of init code.  This is fine -- code that
+	 * isn't aware of PCID will end up harmlessly flushing
+	 * context 0.
+	 */
+	struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
 };
 DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
 
@@ -148,6 +198,8 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 	cr4_set_bits(mask);
 }
 
+extern void initialize_tlbstate_and_flush(void);
+
 static inline void __native_flush_tlb(void)
 {
 	/*
@@ -207,6 +259,14 @@ static inline void __flush_tlb_all(void)
 		__flush_tlb_global();
 	else
 		__flush_tlb();
+
+	/*
+	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+	 * we'd end up flushing kernel translations for the current ASID but
+	 * we might fail to flush kernel translations for other cached ASIDs.
+	 *
+	 * To avoid this issue, we force PCID off if PGE is off.
+	 */
 }
 
 static inline void __flush_tlb_one(unsigned long addr)
@@ -231,9 +291,26 @@ static inline void __flush_tlb_one(unsigned long addr)
  * and page-granular flushes are available only on i486 and up.
  */
 struct flush_tlb_info {
-	struct mm_struct *mm;
-	unsigned long start;
-	unsigned long end;
+	/*
+	 * We support several kinds of flushes.
+	 *
+	 * - Fully flush a single mm.  .mm will be set, .end will be
+	 *   TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
+	 *   which the IPI sender is trying to catch us up.
+	 *
+	 * - Partially flush a single mm.  .mm will be set, .start and
+	 *   .end will indicate the range, and .new_tlb_gen will be set
+	 *   such that the changes between generation .new_tlb_gen-1 and
+	 *   .new_tlb_gen are entirely contained in the indicated range.
+	 *
+	 * - Fully flush all mms whose tlb_gens have been updated.  .mm
+	 *   will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
+	 *   will be zero.
+	 */
+	struct mm_struct	*mm;
+	unsigned long		start;
+	unsigned long		end;
+	u64			new_tlb_gen;
 };
 
 #define local_flush_tlb() __flush_tlb()
@@ -256,12 +333,10 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 void native_flush_tlb_others(const struct cpumask *cpumask,
 			     const struct flush_tlb_info *info);
 
-#define TLBSTATE_OK	1
-#define TLBSTATE_LAZY	2
-
 static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
 					struct mm_struct *mm)
 {
+	inc_mm_tlb_gen(mm);
 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 }
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6358a85e2270..c1d2a9892352 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -75,12 +75,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
 
 extern void setup_node_to_cpumask_map(void);
 
-/*
- * Returns the number of the node containing Node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 extern int __node_distance(int, int);
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
new file mode 100644
index 000000000000..57c8da027d99
--- /dev/null
+++ b/arch/x86/include/asm/trace/common.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_TRACE_COMMON_H
+#define _ASM_TRACE_COMMON_H
+
+#ifdef CONFIG_TRACING
+DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
+#define trace_pagefault_enabled()			\
+	static_branch_unlikely(&trace_pagefault_key)
+DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key);
+#define trace_resched_ipi_enabled()			\
+	static_branch_unlikely(&trace_resched_ipi_key)
+#else
+static inline bool trace_pagefault_enabled(void) { return false; }
+static inline bool trace_resched_ipi_enabled(void) { return false; }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2422b14c50a7..5665bf205b8d 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -5,9 +5,10 @@
 #define _TRACE_PAGE_FAULT_H
 
 #include <linux/tracepoint.h>
+#include <asm/trace/common.h>
 
-extern int trace_irq_vector_regfunc(void);
-extern void trace_irq_vector_unregfunc(void);
+extern int trace_pagefault_reg(void);
+extern void trace_pagefault_unreg(void);
 
 DECLARE_EVENT_CLASS(x86_exceptions,
 
@@ -37,8 +38,7 @@ DEFINE_EVENT_FN(x86_exceptions, name,				\
 	TP_PROTO(unsigned long address,	struct pt_regs *regs,	\
 		 unsigned long error_code),			\
 	TP_ARGS(address, regs, error_code),			\
-	trace_irq_vector_regfunc,				\
-	trace_irq_vector_unregfunc);
+	trace_pagefault_reg, trace_pagefault_unreg);
 
 DEFINE_PAGE_FAULT_EVENT(page_fault_user);
 DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
new file mode 100644
index 000000000000..4253bca99989
--- /dev/null
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HYPERV_H
+
+#include <linux/tracepoint.h>
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+	    TP_PROTO(const struct cpumask *cpus,
+		     const struct flush_tlb_info *info),
+	    TP_ARGS(cpus, info),
+	    TP_STRUCT__entry(
+		    __field(unsigned int, ncpus)
+		    __field(struct mm_struct *, mm)
+		    __field(unsigned long, addr)
+		    __field(unsigned long, end)
+		    ),
+	    TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+			   __entry->mm = info->mm;
+			   __entry->addr = info->start;
+			   __entry->end = info->end;
+		    ),
+	    TP_printk("ncpus %d mm %p addr %lx, end %lx",
+		      __entry->ncpus, __entry->mm,
+		      __entry->addr, __entry->end)
+	);
+
+#endif /* CONFIG_HYPERV */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hyperv
+#endif /* _TRACE_HYPERV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 32dd6a9e343c..1599d394c8c1 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -5,9 +5,12 @@
 #define _TRACE_IRQ_VECTORS_H
 
 #include <linux/tracepoint.h>
+#include <asm/trace/common.h>
 
-extern int trace_irq_vector_regfunc(void);
-extern void trace_irq_vector_unregfunc(void);
+#ifdef CONFIG_X86_LOCAL_APIC
+
+extern int trace_resched_ipi_reg(void);
+extern void trace_resched_ipi_unreg(void);
 
 DECLARE_EVENT_CLASS(x86_irq_vector,
 
@@ -28,15 +31,22 @@ DECLARE_EVENT_CLASS(x86_irq_vector,
 #define DEFINE_IRQ_VECTOR_EVENT(name)		\
 DEFINE_EVENT_FN(x86_irq_vector, name##_entry,	\
 	TP_PROTO(int vector),			\
+	TP_ARGS(vector), NULL, NULL);		\
+DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
+	TP_PROTO(int vector),			\
+	TP_ARGS(vector), NULL, NULL);
+
+#define DEFINE_RESCHED_IPI_EVENT(name)		\
+DEFINE_EVENT_FN(x86_irq_vector, name##_entry,	\
+	TP_PROTO(int vector),			\
 	TP_ARGS(vector),			\
-	trace_irq_vector_regfunc,		\
-	trace_irq_vector_unregfunc);		\
+	trace_resched_ipi_reg,			\
+	trace_resched_ipi_unreg);		\
 DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
 	TP_PROTO(int vector),			\
 	TP_ARGS(vector),			\
-	trace_irq_vector_regfunc,		\
-	trace_irq_vector_unregfunc);
-
+	trace_resched_ipi_reg,			\
+	trace_resched_ipi_unreg);
 
 /*
  * local_timer - called when entering/exiting a local timer interrupt
@@ -45,11 +55,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
 DEFINE_IRQ_VECTOR_EVENT(local_timer);
 
 /*
- * reschedule - called when entering/exiting a reschedule vector handler
- */
-DEFINE_IRQ_VECTOR_EVENT(reschedule);
-
-/*
  * spurious_apic - called when entering/exiting a spurious apic vector handler
  */
 DEFINE_IRQ_VECTOR_EVENT(spurious_apic);
@@ -65,6 +70,7 @@ DEFINE_IRQ_VECTOR_EVENT(error_apic);
  */
 DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
 
+#ifdef CONFIG_IRQ_WORK
 /*
  * irq_work - called when entering/exiting a irq work interrupt
  * vector handler
@@ -81,6 +87,18 @@ DEFINE_IRQ_VECTOR_EVENT(irq_work);
  *  4) goto 1
  */
 TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
+#endif
+
+/*
+ * The ifdef is required because that tracepoint macro hell emits tracepoint
+ * code in files which include this header even if the tracepoint is not
+ * enabled. Brilliant stuff that.
+ */
+#ifdef CONFIG_SMP
+/*
+ * reschedule - called when entering/exiting a reschedule vector handler
+ */
+DEFINE_RESCHED_IPI_EVENT(reschedule);
 
 /*
  * call_function - called when entering/exiting a call function interrupt
@@ -93,24 +111,33 @@ DEFINE_IRQ_VECTOR_EVENT(call_function);
  * single interrupt vector handler
  */
 DEFINE_IRQ_VECTOR_EVENT(call_function_single);
+#endif
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
 /*
  * threshold_apic - called when entering/exiting a threshold apic interrupt
  * vector handler
  */
 DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
+#endif
 
+#ifdef CONFIG_X86_MCE_AMD
 /*
  * deferred_error_apic - called when entering/exiting a deferred apic interrupt
  * vector handler
  */
 DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
+#endif
 
+#ifdef CONFIG_X86_THERMAL_VECTOR
 /*
  * thermal_apic - called when entering/exiting a thermal apic interrupt
  * vector handler
  */
 DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
+#endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7f48cd..5545f6459bf5 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
 asmlinkage void debug(void);
 asmlinkage void nmi(void);
 asmlinkage void int3(void);
-asmlinkage void xen_debug(void);
-asmlinkage void xen_int3(void);
-asmlinkage void xen_stack_segment(void);
 asmlinkage void overflow(void);
 asmlinkage void bounds(void);
 asmlinkage void invalid_op(void);
@@ -38,22 +35,29 @@ asmlinkage void machine_check(void);
 #endif /* CONFIG_X86_MCE */
 asmlinkage void simd_coprocessor_error(void);
 
-#ifdef CONFIG_TRACING
-asmlinkage void trace_page_fault(void);
-#define trace_stack_segment stack_segment
-#define trace_divide_error divide_error
-#define trace_bounds bounds
-#define trace_invalid_op invalid_op
-#define trace_device_not_available device_not_available
-#define trace_coprocessor_segment_overrun coprocessor_segment_overrun
-#define trace_invalid_TSS invalid_TSS
-#define trace_segment_not_present segment_not_present
-#define trace_general_protection general_protection
-#define trace_spurious_interrupt_bug spurious_interrupt_bug
-#define trace_coprocessor_error coprocessor_error
-#define trace_alignment_check alignment_check
-#define trace_simd_coprocessor_error simd_coprocessor_error
-#define trace_async_page_fault async_page_fault
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xendebug(void);
+asmlinkage void xen_xenint3(void);
+asmlinkage void xen_nmi(void);
+asmlinkage void xen_overflow(void);
+asmlinkage void xen_bounds(void);
+asmlinkage void xen_invalid_op(void);
+asmlinkage void xen_device_not_available(void);
+asmlinkage void xen_double_fault(void);
+asmlinkage void xen_coprocessor_segment_overrun(void);
+asmlinkage void xen_invalid_TSS(void);
+asmlinkage void xen_segment_not_present(void);
+asmlinkage void xen_stack_segment(void);
+asmlinkage void xen_general_protection(void);
+asmlinkage void xen_page_fault(void);
+asmlinkage void xen_spurious_interrupt_bug(void);
+asmlinkage void xen_coprocessor_error(void);
+asmlinkage void xen_alignment_check(void);
+#ifdef CONFIG_X86_MCE
+asmlinkage void xen_machine_check(void);
+#endif /* CONFIG_X86_MCE */
+asmlinkage void xen_simd_coprocessor_error(void);
 #endif
 
 dotraplinkage void do_divide_error(struct pt_regs *, long);
@@ -74,14 +78,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
-#ifdef CONFIG_TRACING
-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
-#else
-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
-{
-	do_page_fault(regs, error);
-}
-#endif
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
 dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
 dotraplinkage void do_alignment_check(struct pt_regs *, long);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 30269dafec47..184eb9894dae 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -26,7 +26,12 @@
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.addr_limit)
-#define set_fs(x)	(current->thread.addr_limit = (x))
+static inline void set_fs(mm_segment_t fs)
+{
+	current->thread.addr_limit = fs;
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
+}
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e6676495b125..e9f793e2df7a 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,11 +12,14 @@ struct unwind_state {
 	struct task_struct *task;
 	int graph_idx;
 	bool error;
-#ifdef CONFIG_FRAME_POINTER
+#if defined(CONFIG_ORC_UNWINDER)
+	bool signal, full_regs;
+	unsigned long sp, bp, ip;
+	struct pt_regs *regs;
+#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
 	bool got_irq;
-	unsigned long *bp, *orig_sp;
+	unsigned long *bp, *orig_sp, ip;
 	struct pt_regs *regs;
-	unsigned long ip;
 #else
 	unsigned long *sp;
 #endif
@@ -24,41 +27,30 @@ struct unwind_state {
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long *first_frame);
-
 bool unwind_next_frame(struct unwind_state *state);
-
 unsigned long unwind_get_return_address(struct unwind_state *state);
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);
 
 static inline bool unwind_done(struct unwind_state *state)
 {
 	return state->stack_info.type == STACK_TYPE_UNKNOWN;
 }
 
-static inline
-void unwind_start(struct unwind_state *state, struct task_struct *task,
-		  struct pt_regs *regs, unsigned long *first_frame)
-{
-	first_frame = first_frame ? : get_stack_pointer(task, regs);
-
-	__unwind_start(state, task, regs, first_frame);
-}
-
 static inline bool unwind_error(struct unwind_state *state)
 {
 	return state->error;
 }
 
-#ifdef CONFIG_FRAME_POINTER
-
 static inline
-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+		  struct pt_regs *regs, unsigned long *first_frame)
 {
-	if (unwind_done(state))
-		return NULL;
+	first_frame = first_frame ? : get_stack_pointer(task, regs);
 
-	return state->regs ? &state->regs->ip : state->bp + 1;
+	__unwind_start(state, task, regs, first_frame);
 }
 
+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 {
 	if (unwind_done(state))
@@ -66,20 +58,46 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 
 	return state->regs;
 }
-
-#else /* !CONFIG_FRAME_POINTER */
-
-static inline
-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+#else
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 {
 	return NULL;
 }
+#endif
 
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+#ifdef CONFIG_ORC_UNWINDER
+void unwind_init(void);
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+			void *orc, size_t orc_size);
+#else
+static inline void unwind_init(void) {}
+static inline
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+			void *orc, size_t orc_size) {}
+#endif
+
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+
+static inline bool task_on_another_cpu(struct task_struct *task)
 {
-	return NULL;
+#ifdef CONFIG_SMP
+	return task != current && task->on_cpu;
+#else
+	return false;
+#endif
 }
 
-#endif /* CONFIG_FRAME_POINTER */
-
 #endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..bae46fc6b9de
--- /dev/null
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -0,0 +1,105 @@
+#ifndef _ASM_X86_UNWIND_HINTS_H
+#define _ASM_X86_UNWIND_HINTS_H
+
+#include "orc_types.h"
+
+#ifdef __ASSEMBLY__
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest.  The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack.  Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
+ * category.  In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer.  Such
+ * code needs to be annotated such that objtool can understand it.  The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction.  Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary.  It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
+#ifdef CONFIG_STACK_VALIDATION
+.Lunwind_hint_ip_\@:
+	.pushsection .discard.unwind_hints
+		/* struct unwind_hint */
+		.long .Lunwind_hint_ip_\@ - .
+		.short \sp_offset
+		.byte \sp_reg
+		.byte \type
+	.popsection
+#endif
+.endm
+
+.macro UNWIND_HINT_EMPTY
+	UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+	.if \base == %rsp
+		.if \indirect
+			.set sp_reg, ORC_REG_SP_INDIRECT
+		.else
+			.set sp_reg, ORC_REG_SP
+		.endif
+	.elseif \base == %rbp
+		.set sp_reg, ORC_REG_BP
+	.elseif \base == %rdi
+		.set sp_reg, ORC_REG_DI
+	.elseif \base == %rdx
+		.set sp_reg, ORC_REG_DX
+	.elseif \base == %r10
+		.set sp_reg, ORC_REG_R10
+	.else
+		.error "UNWIND_HINT_REGS: bad base register"
+	.endif
+
+	.set sp_offset, \offset
+
+	.if \iret
+		.set type, ORC_TYPE_REGS_IRET
+	.elseif \extra == 0
+		.set type, ORC_TYPE_REGS_IRET
+		.set sp_offset, \offset + (16*8)
+	.else
+		.set type, ORC_TYPE_REGS
+	.endif
+
+	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
+	UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.endm
+
+.macro UNWIND_HINT_FUNC sp_offset=8
+	UNWIND_HINT sp_offset=\sp_offset
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define UNWIND_HINT(sp_reg, sp_offset, type)			\
+	"987: \n\t"						\
+	".pushsection .discard.unwind_hints\n\t"		\
+	/* struct unwind_hint */				\
+	".long 987b - .\n\t"					\
+	".short " __stringify(sp_offset) "\n\t"		\
+	".byte " __stringify(sp_reg) "\n\t"			\
+	".byte " __stringify(type) "\n\t"			\
+	".popsection\n\t"
+
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
+
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h
index c4b9dc2f67c5..9f42beefc67a 100644
--- a/arch/x86/include/asm/vga.h
+++ b/arch/x86/include/asm/vga.h
@@ -7,12 +7,24 @@
 #ifndef _ASM_X86_VGA_H
 #define _ASM_X86_VGA_H
 
+#include <asm/set_memory.h>
+
 /*
  *	On the PC, we can just recalculate addresses and then
  *	access the videoram directly without any black magic.
+ *	To support memory encryption however, we need to access
+ *	the videoram as decrypted memory.
  */
 
-#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
+#define VGA_MAP_MEM(x, s)					\
+({								\
+	unsigned long start = (unsigned long)phys_to_virt(x);	\
+								\
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))			\
+		set_memory_decrypted(start, (s) >> PAGE_SHIFT);	\
+								\
+	start;							\
+})
 
 #define vga_readb(x) (*(x))
 #define vga_writeb(x, y) (*(y) = (x))
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 11071fcd630e..9606688caa4b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -552,6 +552,8 @@ static inline void
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 			struct desc_struct desc)
 {
+	u32 *p = (u32 *) &desc;
+
 	mcl->op = __HYPERVISOR_update_descriptor;
 	if (sizeof(maddr) == sizeof(long)) {
 		mcl->args[0] = maddr;
@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
 	} else {
 		mcl->args[0] = maddr;
 		mcl->args[1] = maddr >> 32;
-		mcl->args[2] = desc.a;
-		mcl->args[3] = desc.b;
+		mcl->args[2] = *p++;
+		mcl->args[3] = *p;
 	}
 
 	trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 8417ef7c3885..07b6531813c4 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -158,9 +158,6 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
 	unsigned long pfn;
 	int ret;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return mfn;
-
 	if (unlikely(mfn >= machine_to_phys_nr))
 		return ~0;
 
@@ -317,8 +314,6 @@ static inline pte_t __pte_ma(pteval_t x)
 #define p4d_val_ma(x)	((x).p4d)
 #endif
 
-void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
-
 xmaddr_t arbitrary_virt_to_machine(void *address);
 unsigned long arbitrary_virt_to_mfn(void *vaddr);
 void make_lowmem_page_readonly(void *vaddr);
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index ddef37b16af2..66b8f93333d1 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -201,7 +201,7 @@ struct boot_params {
  *
  * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard
  *	PC mechanisms (PCI, ACPI) and doesn't need a special boot flow.
- * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
+ * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest, deprecated
  * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
  * 	which start at asm startup_xen() entry point and later jump to the C
  * 	xen_start_kernel() entry point. Both domU and dom0 type of guests are
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 127ddadee1a5..7032f4d8dff3 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
  */
 #define HV_X64_DEPRECATING_AEOI_RECOMMENDED	(1 << 9)
 
+/* Recommend using the newer ExProcessorMasks interface */
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
+
 /*
  * HV_VP_SET available
  */
@@ -242,7 +245,11 @@
 		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
 
 /* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
 #define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
 #define HVCALL_POST_MESSAGE			0x005c
 #define HVCALL_SIGNAL_EVENT			0x005d
 
@@ -259,6 +266,16 @@
 #define HV_PROCESSOR_POWER_STATE_C2		2
 #define HV_PROCESSOR_POWER_STATE_C3		3
 
+#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+	HV_GENERIC_SET_SPARCE_4K,
+	HV_GENERIC_SET_ALL,
+};
+
 /* hypercall status code */
 #define HV_STATUS_SUCCESS			0
 #define HV_STATUS_INVALID_HYPERCALL_CODE	2
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 39bca7fac087..3be08f07695c 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,9 +3,6 @@
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
-#define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_1GB    (30 << MAP_HUGE_SHIFT)
-
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 /*
  * Take the 4 protection key bits out of the vma->vm_flags
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a01892bdd61a..fd0a7895b63f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -42,7 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
 obj-y			:= process_$(BITS).o signal.o
 obj-$(CONFIG_COMPAT)	+= signal_compat.o
-obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y			+= traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o dumpstack.o nmi.o
 obj-$(CONFIG_MODIFY_LDT_SYSCALL)	+= ldt.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
@@ -111,6 +111,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
 
+obj-$(CONFIG_EISA)		+= eisa.o
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
 obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -126,11 +127,9 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_SCHED_MC_PRIO)		+= itmt.o
 
-ifdef CONFIG_FRAME_POINTER
-obj-y					+= unwind_frame.o
-else
-obj-y					+= unwind_guess.o
-endif
+obj-$(CONFIG_ORC_UNWINDER)		+= unwind_orc.o
+obj-$(CONFIG_FRAME_POINTER_UNWINDER)	+= unwind_frame.o
+obj-$(CONFIG_GUESS_UNWINDER)		+= unwind_guess.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7491e73d9253..f8ae286c1502 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -115,24 +115,24 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
 #define	ACPI_INVALID_GSI		INT_MIN
 
 /*
- * This is just a simple wrapper around early_ioremap(),
+ * This is just a simple wrapper around early_memremap(),
  * with sanity checks for phys == 0 and size == 0.
  */
-char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
 {
 
 	if (!phys || !size)
 		return NULL;
 
-	return early_ioremap(phys, size);
+	return early_memremap(phys, size);
 }
 
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
 {
 	if (!map || !size)
 		return;
 
-	early_iounmap(map, size);
+	early_memunmap(map, size);
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -199,8 +199,10 @@ static int __init
 acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 {
 	struct acpi_madt_local_x2apic *processor = NULL;
+#ifdef CONFIG_X86_X2APIC
 	int apic_id;
 	u8 enabled;
+#endif
 
 	processor = (struct acpi_madt_local_x2apic *)header;
 
@@ -209,9 +211,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 
 	acpi_table_print_madt_entry(header);
 
+#ifdef CONFIG_X86_X2APIC
 	apic_id = processor->local_apic_id;
 	enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
-#ifdef CONFIG_X86_X2APIC
+
 	/*
 	 * We need to register disabled CPU as well to permit
 	 * counting disabled CPUs. This allows us to size
@@ -1083,7 +1086,7 @@ static void __init mp_config_acpi_legacy_irqs(void)
 	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
 #endif
 	set_bit(MP_ISA_BUS, mp_bus_not_pci);
-	pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
+	pr_debug("Bus #%d is ISA (nIRQs: %d)\n", MP_ISA_BUS, nr_legacy_irqs());
 
 	/*
 	 * Use the default configuration for the IRQs 0-15.  Unless
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 32e14d137416..3344d3382e91 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -742,7 +742,16 @@ static void *bp_int3_handler, *bp_int3_addr;
 
 int poke_int3_handler(struct pt_regs *regs)
 {
-	/* bp_patching_in_progress */
+	/*
+	 * Having observed our INT3 instruction, we now must observe
+	 * bp_patching_in_progress.
+	 *
+	 * 	in_progress = TRUE		INT3
+	 * 	WMB				RMB
+	 * 	write INT3			if (in_progress)
+	 *
+	 * Idem for bp_int3_handler.
+	 */
 	smp_rmb();
 
 	if (likely(!bp_patching_in_progress))
@@ -788,9 +797,8 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 	bp_int3_addr = (u8 *)addr + sizeof(int3);
 	bp_patching_in_progress = true;
 	/*
-	 * Corresponding read barrier in int3 notifier for
-	 * making sure the in_progress flags is correctly ordered wrt.
-	 * patching
+	 * Corresponding read barrier in int3 notifier for making sure the
+	 * in_progress and handler are correctly ordered wrt. patching.
 	 */
 	smp_wmb();
 
@@ -815,9 +823,11 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 	text_poke(addr, opcode, sizeof(int3));
 
 	on_each_cpu(do_sync_core, NULL, 1);
-
+	/*
+	 * sync_core() implies an smp_mb() and orders this store against
+	 * the writing of the new instruction.
+	 */
 	bp_patching_in_progress = false;
-	smp_wmb();
 
 	return addr;
 }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 98b3dd8cf2bf..7834f73efbf1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -177,8 +177,6 @@ static int disable_apic_timer __initdata;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-int first_system_vector = FIRST_SYSTEM_VECTOR;
-
 /*
  * Debug level, exported for io_apic.c
  */
@@ -599,9 +597,13 @@ static const struct x86_cpu_id deadline_match[] = {
 
 static void apic_check_deadline_errata(void)
 {
-	const struct x86_cpu_id *m = x86_match_cpu(deadline_match);
+	const struct x86_cpu_id *m;
 	u32 rev;
 
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+		return;
+
+	m = x86_match_cpu(deadline_match);
 	if (!m)
 		return;
 
@@ -990,8 +992,7 @@ void setup_secondary_APIC_clock(void)
  */
 static void local_apic_timer_interrupt(void)
 {
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
 
 	/*
 	 * Normally we should not be here till LAPIC has been initialized but
@@ -1005,7 +1006,8 @@ static void local_apic_timer_interrupt(void)
 	 * spurious.
 	 */
 	if (!evt->event_handler) {
-		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
+			   smp_processor_id());
 		/* Switch it off */
 		lapic_timer_shutdown(evt);
 		return;
@@ -1040,25 +1042,6 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
 	entering_ack_irq();
-	local_apic_timer_interrupt();
-	exiting_irq();
-
-	set_irq_regs(old_regs);
-}
-
-__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 *
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-	entering_ack_irq();
 	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
 	local_apic_timer_interrupt();
 	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
@@ -1920,10 +1903,14 @@ void __init register_lapic_address(unsigned long address)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static void __smp_spurious_interrupt(u8 vector)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
+	u8 vector = ~regs->orig_ax;
 	u32 v;
 
+	entering_irq();
+	trace_spurious_apic_entry(vector);
+
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
 	 * if it is a vectored one.  Just in case...
@@ -1938,22 +1925,7 @@ static void __smp_spurious_interrupt(u8 vector)
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
 		"should never happen.\n", vector, smp_processor_id());
-}
 
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
-{
-	entering_irq();
-	__smp_spurious_interrupt(~regs->orig_ax);
-	exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
-{
-	u8 vector = ~regs->orig_ax;
-
-	entering_irq();
-	trace_spurious_apic_entry(vector);
-	__smp_spurious_interrupt(vector);
 	trace_spurious_apic_exit(vector);
 	exiting_irq();
 }
@@ -1961,10 +1933,8 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-static void __smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
-	u32 v;
-	u32 i = 0;
 	static const char * const error_interrupt_reason[] = {
 		"Send CS error",		/* APIC Error Bit 0 */
 		"Receive CS error",		/* APIC Error Bit 1 */
@@ -1975,6 +1945,10 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 		"Received illegal vector",	/* APIC Error Bit 6 */
 		"Illegal register address",	/* APIC Error Bit 7 */
 	};
+	u32 v, i = 0;
+
+	entering_irq();
+	trace_error_apic_entry(ERROR_APIC_VECTOR);
 
 	/* First tickle the hardware, only then report what went on. -- REW */
 	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
@@ -1996,20 +1970,6 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 
 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
 
-}
-
-__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
-{
-	entering_irq();
-	__smp_error_interrupt(regs);
-	exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
-{
-	entering_irq();
-	trace_error_apic_entry(ERROR_APIC_VECTOR);
-	__smp_error_interrupt(regs);
 	trace_error_apic_exit(ERROR_APIC_VECTOR);
 	exiting_irq();
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 237e9c2341c7..70e48aa6af98 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1243,7 +1243,7 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
 			 entry.vector, entry.irr, entry.delivery_status);
 		if (ir_entry->format)
 			printk(KERN_DEBUG "%s, remapped, I(%04X),  Z(%X)\n",
-			       buf, (ir_entry->index << 15) | ir_entry->index,
+			       buf, (ir_entry->index2 << 15) | ir_entry->index,
 			       ir_entry->zero);
 		else
 			printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index b3af457ed667..88c214e75a6b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -166,7 +166,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		offset = current_offset;
 next:
 		vector += 16;
-		if (vector >= first_system_vector) {
+		if (vector >= FIRST_SYSTEM_VECTOR) {
 			offset = (offset + 1) % 16;
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 880aa093268d..710edab9e644 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -4,9 +4,6 @@
 
 #include <asm/ucontext.h>
 
-#include <linux/lguest.h>
-#include "../../../drivers/lguest/lg.h"
-
 #define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_32.h>
@@ -62,23 +59,6 @@ void foo(void)
 	OFFSET(stack_canary_offset, stack_canary, canary);
 #endif
 
-#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
-	BLANK();
-	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
-	OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
-
-	BLANK();
-	OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
-	OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
-	OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
-	OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp);
-	OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc);
-	OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc);
-	OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt);
-	OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum);
-	OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
-	OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
-#endif
 	BLANK();
 	DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
 	DEFINE(NR_syscalls, sizeof(syscalls));
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 99332f550c48..cf42206926af 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
 int main(void)
 {
 #ifdef CONFIG_PARAVIRT
-	OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
 	BLANK();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cdf82492b770..e17942c131c8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_INTEL_RDT_A)	+= intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o
+obj-$(CONFIG_INTEL_RDT)	+= intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3b9e220621f8..9862e2cd6d93 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -297,13 +297,29 @@ static int nearby_node(int apicid)
 }
 #endif
 
+#ifdef CONFIG_SMP
+/*
+ * Fix up cpu_core_id for pre-F17h systems to be in the
+ * [0 .. cores_per_node - 1] range. Not really needed but
+ * kept so as not to break existing setups.
+ */
+static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
+{
+	u32 cus_per_node;
+
+	if (c->x86 >= 0x17)
+		return;
+
+	cus_per_node = c->x86_max_cores / nodes_per_socket;
+	c->cpu_core_id %= cus_per_node;
+}
+
 /*
  * Fixup core topology information for
  * (1) AMD multi-node processors
  *     Assumption: Number of cores in each internal node is the same.
  * (2) AMD processors supporting compute units
  */
-#ifdef CONFIG_SMP
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
 	u8 node_id;
@@ -354,15 +370,9 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 	} else
 		return;
 
-	/* fixup multi-node processor information */
 	if (nodes_per_socket > 1) {
-		u32 cus_per_node;
-
 		set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-		cus_per_node = c->x86_max_cores / nodes_per_socket;
-
-		/* core id has to be in the [0 .. cores_per_node - 1] range */
-		c->cpu_core_id %= cus_per_node;
+		legacy_fixup_core_id(c);
 	}
 }
 #endif
@@ -548,8 +558,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
+	u32 dummy;
+
 	early_init_amd_mc(c);
 
+	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
 	/*
 	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
 	 * with P/T states and does not stop in deep C-states
@@ -612,6 +626,27 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	 */
 	if (cpu_has_amd_erratum(c, amd_erratum_400))
 		set_cpu_bug(c, X86_BUG_AMD_E400);
+
+	/*
+	 * BIOS support is required for SME. If BIOS has enabled SME then
+	 * adjust x86_phys_bits by the SME physical address space reduction
+	 * value. If BIOS has not enabled SME then don't advertise the
+	 * feature (set in scattered.c). Also, since the SME support requires
+	 * long mode, don't advertise the feature under CONFIG_X86_32.
+	 */
+	if (cpu_has(c, X86_FEATURE_SME)) {
+		u64 msr;
+
+		/* Check if SME is enabled */
+		rdmsrl(MSR_K8_SYSCFG, msr);
+		if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
+			c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+			if (IS_ENABLED(CONFIG_X86_32))
+				clear_cpu_cap(c, X86_FEATURE_SME);
+		} else {
+			clear_cpu_cap(c, X86_FEATURE_SME);
+		}
+	}
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
@@ -730,8 +765,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 
 static void init_amd(struct cpuinfo_x86 *c)
 {
-	u32 dummy;
-
 	early_init_amd(c);
 
 	/*
@@ -793,8 +826,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 > 0x11)
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
-	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
-
 	/* 3DNow or LM implies PREFETCHW */
 	if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0af86d9242da..db684880d74a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,6 +21,14 @@
 
 void __init check_bugs(void)
 {
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
+
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8b39870f33e..fb1d3358a4af 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
 }
 __setup("nompx", x86_mpx_setup);
 
+#ifdef CONFIG_X86_64
+static int __init x86_pcid_setup(char *s)
+{
+	/* require an exact match without trailing characters */
+	if (strlen(s))
+		return 0;
+
+	/* do not emit a message if the feature is not present */
+	if (!boot_cpu_has(X86_FEATURE_PCID))
+		return 1;
+
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+	pr_info("nopcid: PCID feature disabled\n");
+	return 1;
+}
+__setup("nopcid", x86_pcid_setup);
+#endif
+
 static int __init x86_noinvpcid_setup(char *s)
 {
 	/* noinvpcid doesn't accept parameters */
@@ -311,6 +329,38 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	}
 }
 
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_PCID)) {
+		if (cpu_has(c, X86_FEATURE_PGE)) {
+			/*
+			 * We'd like to use cr4_set_bits_and_update_boot(),
+			 * but we can't.  CR4.PCIDE is special and can only
+			 * be set in long mode, and the early CPU init code
+			 * doesn't know this and would try to restore CR4.PCIDE
+			 * prior to entering long mode.
+			 *
+			 * Instead, we rely on the fact that hotplug, resume,
+			 * etc all fully restore CR4 before they write anything
+			 * that could have nonzero PCID bits to CR3.  CR4.PCIDE
+			 * has no effect on the page tables themselves, so we
+			 * don't need it to be restored early.
+			 */
+			cr4_set_bits(X86_CR4_PCIDE);
+		} else {
+			/*
+			 * flush_tlb_all(), as currently implemented, won't
+			 * work if PCID is on but PGE is not.  Since that
+			 * combination doesn't exist on real hardware, there's
+			 * no reason to try to fully support it, but it's
+			 * polite to avoid corrupting data if we're on
+			 * an improperly configured VM.
+			 */
+			clear_cpu_cap(c, X86_FEATURE_PCID);
+		}
+	}
+}
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1125,6 +1175,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	setup_smep(c);
 	setup_smap(c);
 
+	/* Set up PCID */
+	setup_pcid(c);
+
 	/*
 	 * The vendor-specific functions might have changed features.
 	 * Now we do "generic changes."
@@ -1289,15 +1342,6 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct desc_ptr idt_descr __ro_after_init = {
-	.size = NR_VECTORS * 16 - 1,
-	.address = (unsigned long) idt_table,
-};
-const struct desc_ptr debug_idt_descr = {
-	.size = NR_VECTORS * 16 - 1,
-	.address = (unsigned long) debug_idt_table,
-};
-
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
@@ -1552,6 +1596,7 @@ void cpu_init(void)
 	mmgrab(&init_mm);
 	me->active_mm = &init_mm;
 	BUG_ON(me->mm);
+	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, me);
 
 	load_sp0(t, &current->thread);
@@ -1606,6 +1651,7 @@ void cpu_init(void)
 	mmgrab(&init_mm);
 	curr->active_mm = &init_mm;
 	BUG_ON(curr->mm);
+	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, curr);
 
 	load_sp0(t, thread);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c55fb2cb2acc..24f749324c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -811,7 +811,24 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 	struct cacheinfo *this_leaf;
 	int i, sibling;
 
-	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+	/*
+	 * For L3, always use the pre-calculated cpu_llc_shared_mask
+	 * to derive shared_cpu_map.
+	 */
+	if (index == 3) {
+		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
+				continue;
+			this_leaf = this_cpu_ci->info_list + index;
+			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
+				if (!cpu_online(sibling))
+					continue;
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
+			}
+		}
+	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 		unsigned int apicid, nshared, first, last;
 
 		this_leaf = this_cpu_ci->info_list + index;
@@ -839,19 +856,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 						&this_leaf->shared_cpu_map);
 			}
 		}
-	} else if (index == 3) {
-		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
-			this_cpu_ci = get_cpu_cacheinfo(i);
-			if (!this_cpu_ci->info_list)
-				continue;
-			this_leaf = this_cpu_ci->info_list + index;
-			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
-				if (!cpu_online(sibling))
-					continue;
-				cpumask_set_cpu(sibling,
-						&this_leaf->shared_cpu_map);
-			}
-		}
 	} else
 		return 0;
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 5b366462f579..cd5fc61ba450 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -30,7 +30,8 @@
 #include <linux/cpuhotplug.h>
 
 #include <asm/intel-family.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
+#include "intel_rdt.h"
 
 #define MAX_MBA_BW	100u
 #define MBA_IS_LINEAR	0x4
@@ -38,7 +39,13 @@
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
 
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Functions which modify the state
+ * are called with interrupts disabled and no preemption, which
+ * is sufficient for the protection.
+ */
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Used to store the max resource name width and max resource data width
@@ -46,6 +53,12 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
  */
 int max_name_width, max_data_width;
 
+/*
+ * Global boolean for rdt_alloc which is true if any
+ * resource allocation is enabled.
+ */
+bool rdt_alloc_capable;
+
 static void
 mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
 static void
@@ -54,7 +67,9 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
 struct rdt_resource rdt_resources_all[] = {
+	[RDT_RESOURCE_L3] =
 	{
+		.rid			= RDT_RESOURCE_L3,
 		.name			= "L3",
 		.domains		= domain_init(RDT_RESOURCE_L3),
 		.msr_base		= IA32_L3_CBM_BASE,
@@ -67,8 +82,11 @@ struct rdt_resource rdt_resources_all[] = {
 		},
 		.parse_ctrlval		= parse_cbm,
 		.format_str		= "%d=%0*x",
+		.fflags			= RFTYPE_RES_CACHE,
 	},
+	[RDT_RESOURCE_L3DATA] =
 	{
+		.rid			= RDT_RESOURCE_L3DATA,
 		.name			= "L3DATA",
 		.domains		= domain_init(RDT_RESOURCE_L3DATA),
 		.msr_base		= IA32_L3_CBM_BASE,
@@ -81,8 +99,11 @@ struct rdt_resource rdt_resources_all[] = {
 		},
 		.parse_ctrlval		= parse_cbm,
 		.format_str		= "%d=%0*x",
+		.fflags			= RFTYPE_RES_CACHE,
 	},
+	[RDT_RESOURCE_L3CODE] =
 	{
+		.rid			= RDT_RESOURCE_L3CODE,
 		.name			= "L3CODE",
 		.domains		= domain_init(RDT_RESOURCE_L3CODE),
 		.msr_base		= IA32_L3_CBM_BASE,
@@ -95,8 +116,11 @@ struct rdt_resource rdt_resources_all[] = {
 		},
 		.parse_ctrlval		= parse_cbm,
 		.format_str		= "%d=%0*x",
+		.fflags			= RFTYPE_RES_CACHE,
 	},
+	[RDT_RESOURCE_L2] =
 	{
+		.rid			= RDT_RESOURCE_L2,
 		.name			= "L2",
 		.domains		= domain_init(RDT_RESOURCE_L2),
 		.msr_base		= IA32_L2_CBM_BASE,
@@ -109,8 +133,11 @@ struct rdt_resource rdt_resources_all[] = {
 		},
 		.parse_ctrlval		= parse_cbm,
 		.format_str		= "%d=%0*x",
+		.fflags			= RFTYPE_RES_CACHE,
 	},
+	[RDT_RESOURCE_MBA] =
 	{
+		.rid			= RDT_RESOURCE_MBA,
 		.name			= "MB",
 		.domains		= domain_init(RDT_RESOURCE_MBA),
 		.msr_base		= IA32_MBA_THRTL_BASE,
@@ -118,6 +145,7 @@ struct rdt_resource rdt_resources_all[] = {
 		.cache_level		= 3,
 		.parse_ctrlval		= parse_bw,
 		.format_str		= "%d=%*d",
+		.fflags			= RFTYPE_RES_MB,
 	},
 };
 
@@ -144,33 +172,28 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
  * is always 20 on hsw server parts. The minimum cache bitmask length
  * allowed for HSW server is always 2 bits. Hardcode all of them.
  */
-static inline bool cache_alloc_hsw_probe(void)
+static inline void cache_alloc_hsw_probe(void)
 {
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_data.x86 == 6 &&
-	    boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) {
-		struct rdt_resource *r  = &rdt_resources_all[RDT_RESOURCE_L3];
-		u32 l, h, max_cbm = BIT_MASK(20) - 1;
-
-		if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
-			return false;
-		rdmsr(IA32_L3_CBM_BASE, l, h);
+	struct rdt_resource *r  = &rdt_resources_all[RDT_RESOURCE_L3];
+	u32 l, h, max_cbm = BIT_MASK(20) - 1;
 
-		/* If all the bits were set in MSR, return success */
-		if (l != max_cbm)
-			return false;
+	if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
+		return;
+	rdmsr(IA32_L3_CBM_BASE, l, h);
 
-		r->num_closid = 4;
-		r->default_ctrl = max_cbm;
-		r->cache.cbm_len = 20;
-		r->cache.min_cbm_bits = 2;
-		r->capable = true;
-		r->enabled = true;
+	/* If all the bits were set in MSR, return success */
+	if (l != max_cbm)
+		return;
 
-		return true;
-	}
+	r->num_closid = 4;
+	r->default_ctrl = max_cbm;
+	r->cache.cbm_len = 20;
+	r->cache.shareable_bits = 0xc0000;
+	r->cache.min_cbm_bits = 2;
+	r->alloc_capable = true;
+	r->alloc_enabled = true;
 
-	return false;
+	rdt_alloc_capable = true;
 }
 
 /*
@@ -213,15 +236,14 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
 			return false;
 	}
 	r->data_width = 3;
-	rdt_get_mba_infofile(r);
 
-	r->capable = true;
-	r->enabled = true;
+	r->alloc_capable = true;
+	r->alloc_enabled = true;
 
 	return true;
 }
 
-static void rdt_get_cache_config(int idx, struct rdt_resource *r)
+static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
 {
 	union cpuid_0x10_1_eax eax;
 	union cpuid_0x10_x_edx edx;
@@ -231,10 +253,10 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r)
 	r->num_closid = edx.split.cos_max + 1;
 	r->cache.cbm_len = eax.split.cbm_len + 1;
 	r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
+	r->cache.shareable_bits = ebx & r->default_ctrl;
 	r->data_width = (r->cache.cbm_len + 3) / 4;
-	rdt_get_cache_infofile(r);
-	r->capable = true;
-	r->enabled = true;
+	r->alloc_capable = true;
+	r->alloc_enabled = true;
 }
 
 static void rdt_get_cdp_l3_config(int type)
@@ -246,12 +268,12 @@ static void rdt_get_cdp_l3_config(int type)
 	r->cache.cbm_len = r_l3->cache.cbm_len;
 	r->default_ctrl = r_l3->default_ctrl;
 	r->data_width = (r->cache.cbm_len + 3) / 4;
-	r->capable = true;
+	r->alloc_capable = true;
 	/*
 	 * By default, CDP is disabled. CDP can be enabled by mount parameter
 	 * "cdp" during resctrl file system mount time.
 	 */
-	r->enabled = false;
+	r->alloc_enabled = false;
 }
 
 static int get_cache_id(int cpu, int level)
@@ -300,6 +322,19 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
 		wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
 }
 
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+	struct rdt_domain *d;
+
+	list_for_each_entry(d, &r->domains, list) {
+		/* Find the domain that contains this CPU */
+		if (cpumask_test_cpu(cpu, &d->cpu_mask))
+			return d;
+	}
+
+	return NULL;
+}
+
 void rdt_ctrl_update(void *arg)
 {
 	struct msr_param *m = arg;
@@ -307,12 +342,10 @@ void rdt_ctrl_update(void *arg)
 	int cpu = smp_processor_id();
 	struct rdt_domain *d;
 
-	list_for_each_entry(d, &r->domains, list) {
-		/* Find the domain that contains this CPU */
-		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-			r->msr_update(d, m, r);
-			return;
-		}
+	d = get_domain_from_cpu(cpu, r);
+	if (d) {
+		r->msr_update(d, m, r);
+		return;
 	}
 	pr_warn_once("cpu %d not found in any domain for resource %s\n",
 		     cpu, r->name);
@@ -326,8 +359,8 @@ void rdt_ctrl_update(void *arg)
  * caller, return the first domain whose id is bigger than the input id.
  * The domain list is sorted by id in ascending order.
  */
-static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
-					  struct list_head **pos)
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+				   struct list_head **pos)
 {
 	struct rdt_domain *d;
 	struct list_head *l;
@@ -377,6 +410,44 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
 	return 0;
 }
 
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+{
+	size_t tsize;
+
+	if (is_llc_occupancy_enabled()) {
+		d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
+					   sizeof(unsigned long),
+					   GFP_KERNEL);
+		if (!d->rmid_busy_llc)
+			return -ENOMEM;
+		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
+	}
+	if (is_mbm_total_enabled()) {
+		tsize = sizeof(*d->mbm_total);
+		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+		if (!d->mbm_total) {
+			kfree(d->rmid_busy_llc);
+			return -ENOMEM;
+		}
+	}
+	if (is_mbm_local_enabled()) {
+		tsize = sizeof(*d->mbm_local);
+		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+		if (!d->mbm_local) {
+			kfree(d->rmid_busy_llc);
+			kfree(d->mbm_total);
+			return -ENOMEM;
+		}
+	}
+
+	if (is_mbm_enabled()) {
+		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
+		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+	}
+
+	return 0;
+}
+
 /*
  * domain_add_cpu - Add a cpu to a resource's domain list.
  *
@@ -412,14 +483,26 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 		return;
 
 	d->id = id;
+	cpumask_set_cpu(cpu, &d->cpu_mask);
 
-	if (domain_setup_ctrlval(r, d)) {
+	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
+		kfree(d);
+		return;
+	}
+
+	if (r->mon_capable && domain_setup_mon_state(r, d)) {
 		kfree(d);
 		return;
 	}
 
-	cpumask_set_cpu(cpu, &d->cpu_mask);
 	list_add_tail(&d->list, add_pos);
+
+	/*
+	 * If resctrl is mounted, add
+	 * per domain monitor data directories.
+	 */
+	if (static_branch_unlikely(&rdt_mon_enable_key))
+		mkdir_mondata_subdir_allrdtgrp(r, d);
 }
 
 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
@@ -435,19 +518,58 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 
 	cpumask_clear_cpu(cpu, &d->cpu_mask);
 	if (cpumask_empty(&d->cpu_mask)) {
+		/*
+		 * If resctrl is mounted, remove all the
+		 * per domain monitor data directories.
+		 */
+		if (static_branch_unlikely(&rdt_mon_enable_key))
+			rmdir_mondata_subdir_allrdtgrp(r, d->id);
 		kfree(d->ctrl_val);
+		kfree(d->rmid_busy_llc);
+		kfree(d->mbm_total);
+		kfree(d->mbm_local);
 		list_del(&d->list);
+		if (is_mbm_enabled())
+			cancel_delayed_work(&d->mbm_over);
+		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
+			/*
+			 * When a package is going down, forcefully
+			 * decrement rmid->ebusy. There is no way to know
+			 * that the L3 was flushed and hence may lead to
+			 * incorrect counts in rare scenarios, but leaving
+			 * the RMID as busy creates RMID leaks if the
+			 * package never comes back.
+			 */
+			__check_limbo(d, true);
+			cancel_delayed_work(&d->cqm_limbo);
+		}
+
 		kfree(d);
+		return;
+	}
+
+	if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+			cancel_delayed_work(&d->mbm_over);
+			mbm_setup_overflow_handler(d, 0);
+		}
+		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+		    has_busy_rmid(r, d)) {
+			cancel_delayed_work(&d->cqm_limbo);
+			cqm_setup_limbo_handler(d, 0);
+		}
 	}
 }
 
-static void clear_closid(int cpu)
+static void clear_closid_rmid(int cpu)
 {
 	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 
-	per_cpu(cpu_closid, cpu) = 0;
-	state->closid = 0;
-	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+	state->default_closid = 0;
+	state->default_rmid = 0;
+	state->cur_closid = 0;
+	state->cur_rmid = 0;
+	wrmsr(IA32_PQR_ASSOC, 0, 0);
 }
 
 static int intel_rdt_online_cpu(unsigned int cpu)
@@ -459,12 +581,23 @@ static int intel_rdt_online_cpu(unsigned int cpu)
 		domain_add_cpu(cpu, r);
 	/* The cpu is set in default rdtgroup after online. */
 	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
-	clear_closid(cpu);
+	clear_closid_rmid(cpu);
 	mutex_unlock(&rdtgroup_mutex);
 
 	return 0;
 }
 
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+	struct rdtgroup *cr;
+
+	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
+			break;
+		}
+	}
+}
+
 static int intel_rdt_offline_cpu(unsigned int cpu)
 {
 	struct rdtgroup *rdtgrp;
@@ -474,10 +607,12 @@ static int intel_rdt_offline_cpu(unsigned int cpu)
 	for_each_capable_rdt_resource(r)
 		domain_remove_cpu(cpu, r);
 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
+		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+			clear_childcpus(rdtgrp, cpu);
 			break;
+		}
 	}
-	clear_closid(cpu);
+	clear_closid_rmid(cpu);
 	mutex_unlock(&rdtgroup_mutex);
 
 	return 0;
@@ -492,7 +627,7 @@ static __init void rdt_init_padding(void)
 	struct rdt_resource *r;
 	int cl;
 
-	for_each_capable_rdt_resource(r) {
+	for_each_alloc_capable_rdt_resource(r) {
 		cl = strlen(r->name);
 		if (cl > max_name_width)
 			max_name_width = cl;
@@ -502,38 +637,153 @@ static __init void rdt_init_padding(void)
 	}
 }
 
-static __init bool get_rdt_resources(void)
+enum {
+	RDT_FLAG_CMT,
+	RDT_FLAG_MBM_TOTAL,
+	RDT_FLAG_MBM_LOCAL,
+	RDT_FLAG_L3_CAT,
+	RDT_FLAG_L3_CDP,
+	RDT_FLAG_L2_CAT,
+	RDT_FLAG_MBA,
+};
+
+#define RDT_OPT(idx, n, f)	\
+[idx] = {			\
+	.name = n,		\
+	.flag = f		\
+}
+
+struct rdt_options {
+	char	*name;
+	int	flag;
+	bool	force_off, force_on;
+};
+
+static struct rdt_options rdt_options[]  __initdata = {
+	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
+	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
+	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
+	RDT_OPT(RDT_FLAG_L3_CAT,    "l3cat",	X86_FEATURE_CAT_L3),
+	RDT_OPT(RDT_FLAG_L3_CDP,    "l3cdp",	X86_FEATURE_CDP_L3),
+	RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",	X86_FEATURE_CAT_L2),
+	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
+};
+#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
+
+static int __init set_rdt_options(char *str)
+{
+	struct rdt_options *o;
+	bool force_off;
+	char *tok;
+
+	if (*str == '=')
+		str++;
+	while ((tok = strsep(&str, ",")) != NULL) {
+		force_off = *tok == '!';
+		if (force_off)
+			tok++;
+		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
+			if (strcmp(tok, o->name) == 0) {
+				if (force_off)
+					o->force_off = true;
+				else
+					o->force_on = true;
+				break;
+			}
+		}
+	}
+	return 1;
+}
+__setup("rdt", set_rdt_options);
+
+static bool __init rdt_cpu_has(int flag)
+{
+	bool ret = boot_cpu_has(flag);
+	struct rdt_options *o;
+
+	if (!ret)
+		return ret;
+
+	for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
+		if (flag == o->flag) {
+			if (o->force_off)
+				ret = false;
+			if (o->force_on)
+				ret = true;
+			break;
+		}
+	}
+	return ret;
+}
+
+static __init bool get_rdt_alloc_resources(void)
 {
 	bool ret = false;
 
-	if (cache_alloc_hsw_probe())
+	if (rdt_alloc_capable)
 		return true;
 
 	if (!boot_cpu_has(X86_FEATURE_RDT_A))
 		return false;
 
-	if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
-		rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
-		if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
+	if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
+		rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+		if (rdt_cpu_has(X86_FEATURE_CDP_L3)) {
 			rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
 			rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
 		}
 		ret = true;
 	}
-	if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
+	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
 		/* CPUID 0x10.2 fields are same format at 0x10.1 */
-		rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+		rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
 		ret = true;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_MBA)) {
+	if (rdt_cpu_has(X86_FEATURE_MBA)) {
 		if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
 			ret = true;
 	}
-
 	return ret;
 }
 
+static __init bool get_rdt_mon_resources(void)
+{
+	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
+		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
+		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
+		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
+
+	if (!rdt_mon_features)
+		return false;
+
+	return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
+}
+
+static __init void rdt_quirks(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case INTEL_FAM6_HASWELL_X:
+		if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
+			cache_alloc_hsw_probe();
+		break;
+	case INTEL_FAM6_SKYLAKE_X:
+		if (boot_cpu_data.x86_mask <= 4)
+			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
+	}
+}
+
+static __init bool get_rdt_resources(void)
+{
+	rdt_quirks();
+	rdt_alloc_capable = get_rdt_alloc_resources();
+	rdt_mon_capable = get_rdt_mon_resources();
+
+	return (rdt_mon_capable || rdt_alloc_capable);
+}
+
 static int __init intel_rdt_late_init(void)
 {
 	struct rdt_resource *r;
@@ -556,9 +806,12 @@ static int __init intel_rdt_late_init(void)
 		return ret;
 	}
 
-	for_each_capable_rdt_resource(r)
+	for_each_alloc_capable_rdt_resource(r)
 		pr_info("Intel RDT %s allocation detected\n", r->name);
 
+	for_each_mon_capable_rdt_resource(r)
+		pr_info("Intel RDT %s monitoring detected\n", r->name);
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
new file mode 100644
index 000000000000..ebaddaeef023
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -0,0 +1,440 @@
+#ifndef _ASM_X86_INTEL_RDT_H
+#define _ASM_X86_INTEL_RDT_H
+
+#include <linux/sched.h>
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#define IA32_L3_QOS_CFG		0xc81
+#define IA32_L3_CBM_BASE	0xc90
+#define IA32_L2_CBM_BASE	0xd10
+#define IA32_MBA_THRTL_BASE	0xd50
+
+#define L3_QOS_CDP_ENABLE	0x01ULL
+
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID		0x01
+#define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
+#define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
+
+#define CQM_LIMBOCHECK_INTERVAL	1000
+
+#define MBM_CNTR_WIDTH			24
+#define MBM_OVERFLOW_INTERVAL		1000
+
+#define RMID_VAL_ERROR			BIT_ULL(63)
+#define RMID_VAL_UNAVAIL		BIT_ULL(62)
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+/**
+ * struct mon_evt - Entry in the event list of a resource
+ * @evtid:		event id
+ * @name:		name of the event
+ */
+struct mon_evt {
+	u32			evtid;
+	char			*name;
+	struct list_head	list;
+};
+
+/**
+ * struct mon_data_bits - Monitoring details for each event file
+ * @rid:               Resource id associated with the event file.
+ * @evtid:             Event id associated with the event file
+ * @domid:             The domain to which the event file belongs
+ */
+union mon_data_bits {
+	void *priv;
+	struct {
+		unsigned int rid	: 10;
+		unsigned int evtid	: 8;
+		unsigned int domid	: 14;
+	} u;
+};
+
+struct rmid_read {
+	struct rdtgroup		*rgrp;
+	struct rdt_domain	*d;
+	int			evtid;
+	bool			first;
+	u64			val;
+};
+
+extern unsigned int intel_cqm_threshold;
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+extern unsigned int rdt_mon_features;
+
+enum rdt_group_type {
+	RDTCTRL_GROUP = 0,
+	RDTMON_GROUP,
+	RDT_NUM_GROUP,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn		kernlfs node for the mon_data directory
+ * @parent:			parent rdtgrp
+ * @crdtgrp_list:		child rdtgroup node list
+ * @rmid:			rmid for this rdtgroup
+ */
+struct mongroup {
+	struct kernfs_node	*mon_data_kn;
+	struct rdtgroup		*parent;
+	struct list_head	crdtgrp_list;
+	u32			rmid;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn:				kernfs node
+ * @rdtgroup_list:		linked list for all rdtgroups
+ * @closid:			closid for this rdtgroup
+ * @cpu_mask:			CPUs assigned to this rdtgroup
+ * @flags:			status bits
+ * @waitcount:			how many cpus expect to find this
+ *				group when they acquire rdtgroup_mutex
+ * @type:			indicates type of this rdtgroup - either
+ *				monitor only or ctrl_mon group
+ * @mon:			mongroup related data
+ */
+struct rdtgroup {
+	struct kernfs_node	*kn;
+	struct list_head	rdtgroup_list;
+	u32			closid;
+	struct cpumask		cpu_mask;
+	int			flags;
+	atomic_t		waitcount;
+	enum rdt_group_type	type;
+	struct mongroup		mon;
+};
+
+/* rdtgroup.flags */
+#define	RDT_DELETED		1
+
+/* rftype.flags */
+#define RFTYPE_FLAGS_CPUS_LIST	1
+
+/*
+ * Define the file type flags for base and info directories.
+ */
+#define RFTYPE_INFO			BIT(0)
+#define RFTYPE_BASE			BIT(1)
+#define RF_CTRLSHIFT			4
+#define RF_MONSHIFT			5
+#define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
+#define RFTYPE_MON			BIT(RF_MONSHIFT)
+#define RFTYPE_RES_CACHE		BIT(8)
+#define RFTYPE_RES_MB			BIT(9)
+#define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
+#define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
+#define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+extern int max_name_width, max_data_width;
+
+int __init rdtgroup_init(void);
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name:	File name
+ * @mode:	Access mode
+ * @kf_ops:	File operations
+ * @flags:	File specific RFTYPE_FLAGS_* flags
+ * @fflags:	File specific RF_* or RFTYPE_* flags
+ * @seq_show:	Show content of the file
+ * @write:	Write to the file
+ */
+struct rftype {
+	char			*name;
+	umode_t			mode;
+	struct kernfs_ops	*kf_ops;
+	unsigned long		flags;
+	unsigned long		fflags;
+
+	int (*seq_show)(struct kernfs_open_file *of,
+			struct seq_file *sf, void *v);
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct mbm_state - status for each MBM counter in each domain
+ * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
+ * @prev_msr	Value of IA32_QM_CTR for this RMID last time we read it
+ */
+struct mbm_state {
+	u64	chunks;
+	u64	prev_msr;
+};
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list:	all instances of this resource
+ * @id:		unique id for this instance
+ * @cpu_mask:	which cpus share this resource
+ * @rmid_busy_llc:
+ *		bitmap of which limbo RMIDs are above threshold
+ * @mbm_total:	saved state for MBM total bandwidth
+ * @mbm_local:	saved state for MBM local bandwidth
+ * @mbm_over:	worker to periodically read MBM h/w counters
+ * @cqm_limbo:	worker to periodically read CQM h/w counters
+ * @mbm_work_cpu:
+ *		worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ *		worker cpu for CQM h/w counters
+ * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl:	new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
+ */
+struct rdt_domain {
+	struct list_head	list;
+	int			id;
+	struct cpumask		cpu_mask;
+	unsigned long		*rmid_busy_llc;
+	struct mbm_state	*mbm_total;
+	struct mbm_state	*mbm_local;
+	struct delayed_work	mbm_over;
+	struct delayed_work	cqm_limbo;
+	int			mbm_work_cpu;
+	int			cqm_work_cpu;
+	u32			*ctrl_val;
+	u32			new_ctrl;
+	bool			have_new_ctrl;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res:       The resource to use
+ * @low:       Beginning index from base MSR
+ * @high:      End index
+ */
+struct msr_param {
+	struct rdt_resource	*res;
+	int			low;
+	int			high;
+};
+
+/**
+ * struct rdt_cache - Cache allocation related data
+ * @cbm_len:		Length of the cache bit mask
+ * @min_cbm_bits:	Minimum number of consecutive bits to be set
+ * @cbm_idx_mult:	Multiplier of CBM index
+ * @cbm_idx_offset:	Offset of CBM index. CBM index is computed by:
+ *			closid * cbm_idx_multi + cbm_idx_offset
+ *			in a cache bit mask
+ * @shareable_bits:	Bitmask of shareable resource with other
+ *			executing entities
+ */
+struct rdt_cache {
+	unsigned int	cbm_len;
+	unsigned int	min_cbm_bits;
+	unsigned int	cbm_idx_mult;
+	unsigned int	cbm_idx_offset;
+	unsigned int	shareable_bits;
+};
+
+/**
+ * struct rdt_membw - Memory bandwidth allocation related data
+ * @max_delay:		Max throttle delay. Delay is the hardware
+ *			representation for memory bandwidth.
+ * @min_bw:		Minimum memory bandwidth percentage user can request
+ * @bw_gran:		Granularity at which the memory bandwidth is allocated
+ * @delay_linear:	True if memory B/W delay is in linear scale
+ * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
+ */
+struct rdt_membw {
+	u32		max_delay;
+	u32		min_bw;
+	u32		bw_gran;
+	u32		delay_linear;
+	u32		*mb_map;
+};
+
+static inline bool is_llc_occupancy_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
+}
+
+static inline bool is_mbm_total_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
+}
+
+static inline bool is_mbm_local_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
+}
+
+static inline bool is_mbm_enabled(void)
+{
+	return (is_mbm_total_enabled() || is_mbm_local_enabled());
+}
+
+static inline bool is_mbm_event(int e)
+{
+	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @rid:		The index of the resource
+ * @alloc_enabled:	Is allocation enabled on this machine
+ * @mon_enabled:		Is monitoring enabled for this feature
+ * @alloc_capable:	Is allocation available on this machine
+ * @mon_capable:		Is monitor feature available on this machine
+ * @name:		Name to use in "schemata" file
+ * @num_closid:		Number of CLOSIDs available
+ * @cache_level:	Which cache level defines scope of this resource
+ * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
+ * @msr_base:		Base MSR address for CBMs
+ * @msr_update:		Function pointer to update QOS MSRs
+ * @data_width:		Character width of data when displaying
+ * @domains:		All domains for this resource
+ * @cache:		Cache allocation related data
+ * @format_str:		Per resource format string to show domain value
+ * @parse_ctrlval:	Per resource function pointer to parse control values
+ * @evt_list:			List of monitoring events
+ * @num_rmid:			Number of RMIDs available
+ * @mon_scale:			cqm counter * mon_scale = occupancy in bytes
+ * @fflags:			flags to choose base and info files
+ */
+struct rdt_resource {
+	int			rid;
+	bool			alloc_enabled;
+	bool			mon_enabled;
+	bool			alloc_capable;
+	bool			mon_capable;
+	char			*name;
+	int			num_closid;
+	int			cache_level;
+	u32			default_ctrl;
+	unsigned int		msr_base;
+	void (*msr_update)	(struct rdt_domain *d, struct msr_param *m,
+				 struct rdt_resource *r);
+	int			data_width;
+	struct list_head	domains;
+	struct rdt_cache	cache;
+	struct rdt_membw	membw;
+	const char		*format_str;
+	int (*parse_ctrlval)	(char *buf, struct rdt_resource *r,
+				 struct rdt_domain *d);
+	struct list_head	evt_list;
+	int			num_rmid;
+	unsigned int		mon_scale;
+	unsigned long		fflags;
+};
+
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r,  struct rdt_domain *d);
+
+extern struct mutex rdtgroup_mutex;
+
+extern struct rdt_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+
+int __init rdtgroup_init(void);
+
+enum {
+	RDT_RESOURCE_L3,
+	RDT_RESOURCE_L3DATA,
+	RDT_RESOURCE_L3CODE,
+	RDT_RESOURCE_L2,
+	RDT_RESOURCE_MBA,
+
+	/* Must be the last */
+	RDT_NUM_RESOURCES,
+};
+
+#define for_each_capable_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->alloc_capable || r->mon_capable)
+
+#define for_each_alloc_capable_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->alloc_capable)
+
+#define for_each_mon_capable_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->mon_capable)
+
+#define for_each_alloc_enabled_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->alloc_enabled)
+
+#define for_each_mon_enabled_rdt_resource(r)				      \
+	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+	     r++)							      \
+		if (r->mon_enabled)
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
+union cpuid_0x10_1_eax {
+	struct {
+		unsigned int cbm_len:5;
+	} split;
+	unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+	struct {
+		unsigned int max_delay:12;
+	} split;
+	unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
+	struct {
+		unsigned int cos_max:16;
+	} split;
+	unsigned int full;
+};
+
+void rdt_ctrl_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+				   struct list_head **pos);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+			   struct seq_file *s, void *v);
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+int alloc_rmid(void);
+void free_rmid(u32 rmid);
+int rdt_get_mon_l3_config(struct rdt_resource *r);
+void mon_event_count(void *info);
+int rdtgroup_mondata_show(struct seq_file *m, void *arg);
+void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+				    unsigned int dom_id);
+void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+				    struct rdt_domain *d);
+void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
+		    struct rdtgroup *rdtgrp, int evtid, int first);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+				unsigned long delay_ms);
+void mbm_handle_overflow(struct work_struct *work);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
+
+#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 406d7a6532f9..f6ea94f8954a 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -26,7 +26,7 @@
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <asm/intel_rdt.h>
+#include "intel_rdt.h"
 
 /*
  * Check whether MBA bandwidth percentage value is correct. The value is
@@ -192,7 +192,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
 {
 	struct rdt_resource *r;
 
-	for_each_enabled_rdt_resource(r) {
+	for_each_alloc_enabled_rdt_resource(r) {
 		if (!strcmp(resname, r->name) && closid < r->num_closid)
 			return parse_line(tok, r);
 	}
@@ -221,7 +221,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 
 	closid = rdtgrp->closid;
 
-	for_each_enabled_rdt_resource(r) {
+	for_each_alloc_enabled_rdt_resource(r) {
 		list_for_each_entry(dom, &r->domains, list)
 			dom->have_new_ctrl = false;
 	}
@@ -237,7 +237,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 			goto out;
 	}
 
-	for_each_enabled_rdt_resource(r) {
+	for_each_alloc_enabled_rdt_resource(r) {
 		ret = update_domains(r, closid);
 		if (ret)
 			goto out;
@@ -269,12 +269,13 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
 {
 	struct rdtgroup *rdtgrp;
 	struct rdt_resource *r;
-	int closid, ret = 0;
+	int ret = 0;
+	u32 closid;
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 	if (rdtgrp) {
 		closid = rdtgrp->closid;
-		for_each_enabled_rdt_resource(r) {
+		for_each_alloc_enabled_rdt_resource(r) {
 			if (closid < r->num_closid)
 				show_doms(s, r, closid);
 		}
@@ -284,3 +285,57 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
 	rdtgroup_kn_unlock(of->kn);
 	return ret;
 }
+
+void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
+		    struct rdtgroup *rdtgrp, int evtid, int first)
+{
+	/*
+	 * setup the parameters to send to the IPI to read the data.
+	 */
+	rr->rgrp = rdtgrp;
+	rr->evtid = evtid;
+	rr->d = d;
+	rr->val = 0;
+	rr->first = first;
+
+	smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+}
+
+int rdtgroup_mondata_show(struct seq_file *m, void *arg)
+{
+	struct kernfs_open_file *of = m->private;
+	u32 resid, evtid, domid;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *r;
+	union mon_data_bits md;
+	struct rdt_domain *d;
+	struct rmid_read rr;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+	md.priv = of->kn->priv;
+	resid = md.u.rid;
+	domid = md.u.domid;
+	evtid = md.u.evtid;
+
+	r = &rdt_resources_all[resid];
+	d = rdt_find_domain(r, domid, NULL);
+	if (!d) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	mon_event_read(&rr, d, rdtgrp, evtid, false);
+
+	if (rr.val & RMID_VAL_ERROR)
+		seq_puts(m, "Error\n");
+	else if (rr.val & RMID_VAL_UNAVAIL)
+		seq_puts(m, "Unavailable\n");
+	else
+		seq_printf(m, "%llu\n", rr.val * r->mon_scale);
+
+out:
+	rdtgroup_kn_unlock(of->kn);
+	return ret;
+}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
new file mode 100644
index 000000000000..30827510094b
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -0,0 +1,499 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Monitoring code
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ *    Vikas Shivappa <vikas.shivappa@intel.com>
+ *
+ * This replaces the cqm.c based on perf but we reuse a lot of
+ * code and datastructures originally from Peter Zijlstra and Matt Fleming.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "intel_rdt.h"
+
+#define MSR_IA32_QM_CTR		0x0c8e
+#define MSR_IA32_QM_EVTSEL		0x0c8d
+
+struct rmid_entry {
+	u32				rmid;
+	int				busy;
+	struct list_head		list;
+};
+
+/**
+ * @rmid_free_lru    A least recently used list of free RMIDs
+ *     These RMIDs are guaranteed to have an occupancy less than the
+ *     threshold occupancy
+ */
+static LIST_HEAD(rmid_free_lru);
+
+/**
+ * @rmid_limbo_count     count of currently unused but (potentially)
+ *     dirty RMIDs.
+ *     This counts RMIDs that no one is currently using but that
+ *     may have a occupancy value > intel_cqm_threshold. User can change
+ *     the threshold occupancy value.
+ */
+unsigned int rmid_limbo_count;
+
+/**
+ * @rmid_entry - The entry in the limbo and free lists.
+ */
+static struct rmid_entry	*rmid_ptrs;
+
+/*
+ * Global boolean for rdt_monitor which is true if any
+ * resource monitoring is enabled.
+ */
+bool rdt_mon_capable;
+
+/*
+ * Global to indicate which monitoring events are enabled.
+ */
+unsigned int rdt_mon_features;
+
+/*
+ * This is the threshold cache occupancy at which we will consider an
+ * RMID available for re-allocation.
+ */
+unsigned int intel_cqm_threshold;
+
+static inline struct rmid_entry *__rmid_entry(u32 rmid)
+{
+	struct rmid_entry *entry;
+
+	entry = &rmid_ptrs[rmid];
+	WARN_ON(entry->rmid != rmid);
+
+	return entry;
+}
+
+static u64 __rmid_read(u32 rmid, u32 eventid)
+{
+	u64 val;
+
+	/*
+	 * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
+	 * with a valid event code for supported resource type and the bits
+	 * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
+	 * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
+	 * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
+	 * are error bits.
+	 */
+	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+	rdmsrl(MSR_IA32_QM_CTR, val);
+
+	return val;
+}
+
+static bool rmid_dirty(struct rmid_entry *entry)
+{
+	u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+
+	return val >= intel_cqm_threshold;
+}
+
+/*
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
+ */
+void __check_limbo(struct rdt_domain *d, bool force_free)
+{
+	struct rmid_entry *entry;
+	struct rdt_resource *r;
+	u32 crmid = 1, nrmid;
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+	/*
+	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+	 * are marked as busy for occupancy < threshold. If the occupancy
+	 * is less than the threshold decrement the busy counter of the
+	 * RMID and move it to the free list when the counter reaches 0.
+	 */
+	for (;;) {
+		nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
+		if (nrmid >= r->num_rmid)
+			break;
+
+		entry = __rmid_entry(nrmid);
+		if (force_free || !rmid_dirty(entry)) {
+			clear_bit(entry->rmid, d->rmid_busy_llc);
+			if (!--entry->busy) {
+				rmid_limbo_count--;
+				list_add_tail(&entry->list, &rmid_free_lru);
+			}
+		}
+		crmid = nrmid + 1;
+	}
+}
+
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+{
+	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+}
+
+/*
+ * As of now the RMIDs allocation is global.
+ * However we keep track of which packages the RMIDs
+ * are used to optimize the limbo list management.
+ */
+int alloc_rmid(void)
+{
+	struct rmid_entry *entry;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	if (list_empty(&rmid_free_lru))
+		return rmid_limbo_count ? -EBUSY : -ENOSPC;
+
+	entry = list_first_entry(&rmid_free_lru,
+				 struct rmid_entry, list);
+	list_del(&entry->list);
+
+	return entry->rmid;
+}
+
+static void add_rmid_to_limbo(struct rmid_entry *entry)
+{
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+	int cpu;
+	u64 val;
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+	entry->busy = 0;
+	cpu = get_cpu();
+	list_for_each_entry(d, &r->domains, list) {
+		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
+			val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+			if (val <= intel_cqm_threshold)
+				continue;
+		}
+
+		/*
+		 * For the first limbo RMID in the domain,
+		 * setup up the limbo worker.
+		 */
+		if (!has_busy_rmid(r, d))
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+		set_bit(entry->rmid, d->rmid_busy_llc);
+		entry->busy++;
+	}
+	put_cpu();
+
+	if (entry->busy)
+		rmid_limbo_count++;
+	else
+		list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+void free_rmid(u32 rmid)
+{
+	struct rmid_entry *entry;
+
+	if (!rmid)
+		return;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	entry = __rmid_entry(rmid);
+
+	if (is_llc_occupancy_enabled())
+		add_rmid_to_limbo(entry);
+	else
+		list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+{
+	u64 chunks, shift, tval;
+	struct mbm_state *m;
+
+	tval = __rmid_read(rmid, rr->evtid);
+	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
+		rr->val = tval;
+		return -EINVAL;
+	}
+	switch (rr->evtid) {
+	case QOS_L3_OCCUP_EVENT_ID:
+		rr->val += tval;
+		return 0;
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		m = &rr->d->mbm_total[rmid];
+		break;
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		m = &rr->d->mbm_local[rmid];
+		break;
+	default:
+		/*
+		 * Code would never reach here because
+		 * an invalid event id would fail the __rmid_read.
+		 */
+		return -EINVAL;
+	}
+
+	if (rr->first) {
+		m->prev_msr = tval;
+		m->chunks = 0;
+		return 0;
+	}
+
+	shift = 64 - MBM_CNTR_WIDTH;
+	chunks = (tval << shift) - (m->prev_msr << shift);
+	chunks >>= shift;
+	m->chunks += chunks;
+	m->prev_msr = tval;
+
+	rr->val += m->chunks;
+	return 0;
+}
+
+/*
+ * This is called via IPI to read the CQM/MBM counters
+ * on a domain.
+ */
+void mon_event_count(void *info)
+{
+	struct rdtgroup *rdtgrp, *entry;
+	struct rmid_read *rr = info;
+	struct list_head *head;
+
+	rdtgrp = rr->rgrp;
+
+	if (__mon_event_count(rdtgrp->mon.rmid, rr))
+		return;
+
+	/*
+	 * For Ctrl groups read data from child monitor groups.
+	 */
+	head = &rdtgrp->mon.crdtgrp_list;
+
+	if (rdtgrp->type == RDTCTRL_GROUP) {
+		list_for_each_entry(entry, head, mon.crdtgrp_list) {
+			if (__mon_event_count(entry->mon.rmid, rr))
+				return;
+		}
+	}
+}
+
+static void mbm_update(struct rdt_domain *d, int rmid)
+{
+	struct rmid_read rr;
+
+	rr.first = false;
+	rr.d = d;
+
+	/*
+	 * This is protected from concurrent reads from user
+	 * as both the user and we hold the global mutex.
+	 */
+	if (is_mbm_total_enabled()) {
+		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
+		__mon_event_count(rmid, &rr);
+	}
+	if (is_mbm_local_enabled()) {
+		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+		__mon_event_count(rmid, &rr);
+	}
+}
+
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+	int cpu = smp_processor_id();
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+	d = get_domain_from_cpu(cpu, r);
+
+	if (!d) {
+		pr_warn_once("Failure to get domain for limbo worker\n");
+		goto out_unlock;
+	}
+
+	__check_limbo(d, false);
+
+	if (has_busy_rmid(r, d))
+		schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
+}
+
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+	unsigned long delay = msecs_to_jiffies(delay_ms);
+	struct rdt_resource *r;
+	int cpu;
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+	cpu = cpumask_any(&dom->cpu_mask);
+	dom->cqm_work_cpu = cpu;
+
+	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
+void mbm_handle_overflow(struct work_struct *work)
+{
+	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
+	struct rdtgroup *prgrp, *crgrp;
+	int cpu = smp_processor_id();
+	struct list_head *head;
+	struct rdt_domain *d;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	if (!static_branch_likely(&rdt_enable_key))
+		goto out_unlock;
+
+	d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
+	if (!d)
+		goto out_unlock;
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		mbm_update(d, prgrp->mon.rmid);
+
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
+			mbm_update(d, crgrp->mon.rmid);
+	}
+
+	schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
+}
+
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+	unsigned long delay = msecs_to_jiffies(delay_ms);
+	int cpu;
+
+	if (!static_branch_likely(&rdt_enable_key))
+		return;
+	cpu = cpumask_any(&dom->cpu_mask);
+	dom->mbm_work_cpu = cpu;
+	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+}
+
+static int dom_data_init(struct rdt_resource *r)
+{
+	struct rmid_entry *entry = NULL;
+	int i, nr_rmids;
+
+	nr_rmids = r->num_rmid;
+	rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+	if (!rmid_ptrs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_rmids; i++) {
+		entry = &rmid_ptrs[i];
+		INIT_LIST_HEAD(&entry->list);
+
+		entry->rmid = i;
+		list_add_tail(&entry->list, &rmid_free_lru);
+	}
+
+	/*
+	 * RMID 0 is special and is always allocated. It's used for all
+	 * tasks that are not monitored.
+	 */
+	entry = __rmid_entry(0);
+	list_del(&entry->list);
+
+	return 0;
+}
+
+static struct mon_evt llc_occupancy_event = {
+	.name		= "llc_occupancy",
+	.evtid		= QOS_L3_OCCUP_EVENT_ID,
+};
+
+static struct mon_evt mbm_total_event = {
+	.name		= "mbm_total_bytes",
+	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
+};
+
+static struct mon_evt mbm_local_event = {
+	.name		= "mbm_local_bytes",
+	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
+};
+
+/*
+ * Initialize the event list for the resource.
+ *
+ * Note that MBM events are also part of RDT_RESOURCE_L3 resource
+ * because as per the SDM the total and local memory bandwidth
+ * are enumerated as part of L3 monitoring.
+ */
+static void l3_mon_evt_init(struct rdt_resource *r)
+{
+	INIT_LIST_HEAD(&r->evt_list);
+
+	if (is_llc_occupancy_enabled())
+		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
+	if (is_mbm_total_enabled())
+		list_add_tail(&mbm_total_event.list, &r->evt_list);
+	if (is_mbm_local_enabled())
+		list_add_tail(&mbm_local_event.list, &r->evt_list);
+}
+
+int rdt_get_mon_l3_config(struct rdt_resource *r)
+{
+	int ret;
+
+	r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
+	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+
+	/*
+	 * A reasonable upper limit on the max threshold is the number
+	 * of lines tagged per RMID if all RMIDs have the same number of
+	 * lines tagged in the LLC.
+	 *
+	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+	 */
+	intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
+
+	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
+	intel_cqm_threshold /= r->mon_scale;
+
+	ret = dom_data_init(r);
+	if (ret)
+		return ret;
+
+	l3_mon_evt_init(r);
+
+	r->mon_capable = true;
+	r->mon_enabled = true;
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 9257bd9dc664..a869d4a073c5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -32,17 +32,25 @@
 
 #include <uapi/linux/magic.h>
 
-#include <asm/intel_rdt.h>
-#include <asm/intel_rdt_common.h>
+#include <asm/intel_rdt_sched.h>
+#include "intel_rdt.h"
 
 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
-struct kernfs_root *rdt_root;
+DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+static struct kernfs_root *rdt_root;
 struct rdtgroup rdtgroup_default;
 LIST_HEAD(rdt_all_groups);
 
 /* Kernel fs node for "info" directory under root */
 static struct kernfs_node *kn_info;
 
+/* Kernel fs node for "mon_groups" directory under root */
+static struct kernfs_node *kn_mongrp;
+
+/* Kernel fs node for "mon_data" directory under root */
+static struct kernfs_node *kn_mondata;
+
 /*
  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  * we can keep a bitmap of free CLOSIDs in a single integer.
@@ -66,7 +74,7 @@ static void closid_init(void)
 	int rdt_min_closid = 32;
 
 	/* Compute rdt_min_closid across all resources */
-	for_each_enabled_rdt_resource(r)
+	for_each_alloc_enabled_rdt_resource(r)
 		rdt_min_closid = min(rdt_min_closid, r->num_closid);
 
 	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
@@ -75,9 +83,9 @@ static void closid_init(void)
 	closid_free_map &= ~1;
 }
 
-int closid_alloc(void)
+static int closid_alloc(void)
 {
-	int closid = ffs(closid_free_map);
+	u32 closid = ffs(closid_free_map);
 
 	if (closid == 0)
 		return -ENOSPC;
@@ -125,28 +133,6 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 	return 0;
 }
 
-static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
-			      int len)
-{
-	struct rftype *rft;
-	int ret;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		ret = rdtgroup_add_file(kn, rft);
-		if (ret)
-			goto error;
-	}
-
-	return 0;
-error:
-	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
-	while (--rft >= rfts)
-		kernfs_remove_by_name(kn, rft->name);
-	return ret;
-}
-
 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 {
 	struct kernfs_open_file *of = m->private;
@@ -174,6 +160,11 @@ static struct kernfs_ops rdtgroup_kf_single_ops = {
 	.seq_show		= rdtgroup_seqfile_show,
 };
 
+static struct kernfs_ops kf_mondata_ops = {
+	.atomic_write_len	= PAGE_SIZE,
+	.seq_show		= rdtgroup_mondata_show,
+};
+
 static bool is_cpu_list(struct kernfs_open_file *of)
 {
 	struct rftype *rft = of->kn->priv;
@@ -203,13 +194,18 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 /*
  * This is safe against intel_rdt_sched_in() called from __switch_to()
  * because __switch_to() is executed with interrupts disabled. A local call
- * from rdt_update_closid() is proteced against __switch_to() because
+ * from update_closid_rmid() is proteced against __switch_to() because
  * preemption is disabled.
  */
-static void rdt_update_cpu_closid(void *closid)
+static void update_cpu_closid_rmid(void *info)
 {
-	if (closid)
-		this_cpu_write(cpu_closid, *(int *)closid);
+	struct rdtgroup *r = info;
+
+	if (r) {
+		this_cpu_write(pqr_state.default_closid, r->closid);
+		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+	}
+
 	/*
 	 * We cannot unconditionally write the MSR because the current
 	 * executing task might have its own closid selected. Just reuse
@@ -221,28 +217,128 @@ static void rdt_update_cpu_closid(void *closid)
 /*
  * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
  *
- * Per task closids must have been set up before calling this function.
- *
- * The per cpu closids are updated with the smp function call, when @closid
- * is not NULL. If @closid is NULL then all affected percpu closids must
- * have been set up before calling this function.
+ * Per task closids/rmids must have been set up before calling this function.
  */
 static void
-rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 {
 	int cpu = get_cpu();
 
 	if (cpumask_test_cpu(cpu, cpu_mask))
-		rdt_update_cpu_closid(closid);
-	smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
+		update_cpu_closid_rmid(r);
+	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 	put_cpu();
 }
 
+static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			  cpumask_var_t tmpmask)
+{
+	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
+	struct list_head *head;
+
+	/* Check whether cpus belong to parent ctrl group */
+	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
+	if (cpumask_weight(tmpmask))
+		return -EINVAL;
+
+	/* Check whether cpus are dropped from this group */
+	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+	if (cpumask_weight(tmpmask)) {
+		/* Give any dropped cpus to parent rdtgroup */
+		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
+		update_closid_rmid(tmpmask, prgrp);
+	}
+
+	/*
+	 * If we added cpus, remove them from previous group that owned them
+	 * and update per-cpu rmid
+	 */
+	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+	if (cpumask_weight(tmpmask)) {
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+			if (crgrp == rdtgrp)
+				continue;
+			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
+				       tmpmask);
+		}
+		update_closid_rmid(tmpmask, rdtgrp);
+	}
+
+	/* Done pushing/pulling - update this group with new mask */
+	cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+	return 0;
+}
+
+static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
+{
+	struct rdtgroup *crgrp;
+
+	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
+	/* update the child mon group masks as well*/
+	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
+		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
+}
+
+static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+	struct rdtgroup *r, *crgrp;
+	struct list_head *head;
+
+	/* Check whether cpus are dropped from this group */
+	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+	if (cpumask_weight(tmpmask)) {
+		/* Can't drop from default group */
+		if (rdtgrp == &rdtgroup_default)
+			return -EINVAL;
+
+		/* Give any dropped cpus to rdtgroup_default */
+		cpumask_or(&rdtgroup_default.cpu_mask,
+			   &rdtgroup_default.cpu_mask, tmpmask);
+		update_closid_rmid(tmpmask, &rdtgroup_default);
+	}
+
+	/*
+	 * If we added cpus, remove them from previous group and
+	 * the prev group's child groups that owned them
+	 * and update per-cpu closid/rmid.
+	 */
+	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+	if (cpumask_weight(tmpmask)) {
+		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
+			if (r == rdtgrp)
+				continue;
+			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
+			if (cpumask_weight(tmpmask1))
+				cpumask_rdtgrp_clear(r, tmpmask1);
+		}
+		update_closid_rmid(tmpmask, rdtgrp);
+	}
+
+	/* Done pushing/pulling - update this group with new mask */
+	cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+	/*
+	 * Clear child mon group masks since there is a new parent mask
+	 * now and update the rmid for the cpus the child lost.
+	 */
+	head = &rdtgrp->mon.crdtgrp_list;
+	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
+		update_closid_rmid(tmpmask, rdtgrp);
+		cpumask_clear(&crgrp->cpu_mask);
+	}
+
+	return 0;
+}
+
 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 				   char *buf, size_t nbytes, loff_t off)
 {
-	cpumask_var_t tmpmask, newmask;
-	struct rdtgroup *rdtgrp, *r;
+	cpumask_var_t tmpmask, newmask, tmpmask1;
+	struct rdtgroup *rdtgrp;
 	int ret;
 
 	if (!buf)
@@ -254,6 +350,11 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 		free_cpumask_var(tmpmask);
 		return -ENOMEM;
 	}
+	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+		free_cpumask_var(tmpmask);
+		free_cpumask_var(newmask);
+		return -ENOMEM;
+	}
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 	if (!rdtgrp) {
@@ -276,41 +377,18 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 		goto unlock;
 	}
 
-	/* Check whether cpus are dropped from this group */
-	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-	if (cpumask_weight(tmpmask)) {
-		/* Can't drop from default group */
-		if (rdtgrp == &rdtgroup_default) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-		/* Give any dropped cpus to rdtgroup_default */
-		cpumask_or(&rdtgroup_default.cpu_mask,
-			   &rdtgroup_default.cpu_mask, tmpmask);
-		rdt_update_closid(tmpmask, &rdtgroup_default.closid);
-	}
-
-	/*
-	 * If we added cpus, remove them from previous group that owned them
-	 * and update per-cpu closid
-	 */
-	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-	if (cpumask_weight(tmpmask)) {
-		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
-			if (r == rdtgrp)
-				continue;
-			cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
-		}
-		rdt_update_closid(tmpmask, &rdtgrp->closid);
-	}
-
-	/* Done pushing/pulling - update this group with new mask */
-	cpumask_copy(&rdtgrp->cpu_mask, newmask);
+	if (rdtgrp->type == RDTCTRL_GROUP)
+		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+	else if (rdtgrp->type == RDTMON_GROUP)
+		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+	else
+		ret = -EINVAL;
 
 unlock:
 	rdtgroup_kn_unlock(of->kn);
 	free_cpumask_var(tmpmask);
 	free_cpumask_var(newmask);
+	free_cpumask_var(tmpmask1);
 
 	return ret ?: nbytes;
 }
@@ -336,6 +414,7 @@ static void move_myself(struct callback_head *head)
 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 	    (rdtgrp->flags & RDT_DELETED)) {
 		current->closid = 0;
+		current->rmid = 0;
 		kfree(rdtgrp);
 	}
 
@@ -374,7 +453,20 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 		atomic_dec(&rdtgrp->waitcount);
 		kfree(callback);
 	} else {
-		tsk->closid = rdtgrp->closid;
+		/*
+		 * For ctrl_mon groups move both closid and rmid.
+		 * For monitor groups, can move the tasks only from
+		 * their parent CTRL group.
+		 */
+		if (rdtgrp->type == RDTCTRL_GROUP) {
+			tsk->closid = rdtgrp->closid;
+			tsk->rmid = rdtgrp->mon.rmid;
+		} else if (rdtgrp->type == RDTMON_GROUP) {
+			if (rdtgrp->mon.parent->closid == tsk->closid)
+				tsk->rmid = rdtgrp->mon.rmid;
+			else
+				ret = -EINVAL;
+		}
 	}
 	return ret;
 }
@@ -454,7 +546,8 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 
 	rcu_read_lock();
 	for_each_process_thread(p, t) {
-		if (t->closid == r->closid)
+		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
+		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
 			seq_printf(s, "%d\n", t->pid);
 	}
 	rcu_read_unlock();
@@ -476,39 +569,6 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 	return ret;
 }
 
-/* Files in each rdtgroup */
-static struct rftype rdtgroup_base_files[] = {
-	{
-		.name		= "cpus",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-	},
-	{
-		.name		= "cpus_list",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-		.flags		= RFTYPE_FLAGS_CPUS_LIST,
-	},
-	{
-		.name		= "tasks",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_tasks_write,
-		.seq_show	= rdtgroup_tasks_show,
-	},
-	{
-		.name		= "schemata",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_schemata_write,
-		.seq_show	= rdtgroup_schemata_show,
-	},
-};
-
 static int rdt_num_closids_show(struct kernfs_open_file *of,
 				struct seq_file *seq, void *v)
 {
@@ -536,6 +596,15 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 	return 0;
 }
 
+static int rdt_shareable_bits_show(struct kernfs_open_file *of,
+				   struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	seq_printf(seq, "%x\n", r->cache.shareable_bits);
+	return 0;
+}
+
 static int rdt_min_bw_show(struct kernfs_open_file *of,
 			     struct seq_file *seq, void *v)
 {
@@ -545,6 +614,28 @@ static int rdt_min_bw_show(struct kernfs_open_file *of,
 	return 0;
 }
 
+static int rdt_num_rmids_show(struct kernfs_open_file *of,
+			      struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	seq_printf(seq, "%d\n", r->num_rmid);
+
+	return 0;
+}
+
+static int rdt_mon_features_show(struct kernfs_open_file *of,
+				 struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+	struct mon_evt *mevt;
+
+	list_for_each_entry(mevt, &r->evt_list, list)
+		seq_printf(seq, "%s\n", mevt->name);
+
+	return 0;
+}
+
 static int rdt_bw_gran_show(struct kernfs_open_file *of,
 			     struct seq_file *seq, void *v)
 {
@@ -563,74 +654,200 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of,
 	return 0;
 }
 
+static int max_threshold_occ_show(struct kernfs_open_file *of,
+				  struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale);
+
+	return 0;
+}
+
+static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
+				       char *buf, size_t nbytes, loff_t off)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+	unsigned int bytes;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &bytes);
+	if (ret)
+		return ret;
+
+	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
+		return -EINVAL;
+
+	intel_cqm_threshold = bytes / r->mon_scale;
+
+	return nbytes;
+}
+
 /* rdtgroup information files for one cache resource. */
-static struct rftype res_cache_info_files[] = {
+static struct rftype res_common_files[] = {
 	{
 		.name		= "num_closids",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_num_closids_show,
+		.fflags		= RF_CTRL_INFO,
+	},
+	{
+		.name		= "mon_features",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_mon_features_show,
+		.fflags		= RF_MON_INFO,
+	},
+	{
+		.name		= "num_rmids",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_num_rmids_show,
+		.fflags		= RF_MON_INFO,
 	},
 	{
 		.name		= "cbm_mask",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_default_ctrl_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
 	},
 	{
 		.name		= "min_cbm_bits",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_min_cbm_bits_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
 	},
-};
-
-/* rdtgroup information files for memory bandwidth. */
-static struct rftype res_mba_info_files[] = {
 	{
-		.name		= "num_closids",
+		.name		= "shareable_bits",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_num_closids_show,
+		.seq_show	= rdt_shareable_bits_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
 	},
 	{
 		.name		= "min_bandwidth",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_min_bw_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
 	},
 	{
 		.name		= "bandwidth_gran",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_bw_gran_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
 	},
 	{
 		.name		= "delay_linear",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
 		.seq_show	= rdt_delay_linear_show,
+		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
+	},
+	{
+		.name		= "max_threshold_occupancy",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= max_threshold_occ_write,
+		.seq_show	= max_threshold_occ_show,
+		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "cpus",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_cpus_write,
+		.seq_show	= rdtgroup_cpus_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "cpus_list",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_cpus_write,
+		.seq_show	= rdtgroup_cpus_show,
+		.flags		= RFTYPE_FLAGS_CPUS_LIST,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "tasks",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_tasks_write,
+		.seq_show	= rdtgroup_tasks_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "schemata",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_schemata_write,
+		.seq_show	= rdtgroup_schemata_show,
+		.fflags		= RF_CTRL_BASE,
 	},
 };
 
-void rdt_get_mba_infofile(struct rdt_resource *r)
+static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
 {
-	r->info_files = res_mba_info_files;
-	r->nr_info_files = ARRAY_SIZE(res_mba_info_files);
+	struct rftype *rfts, *rft;
+	int ret, len;
+
+	rfts = res_common_files;
+	len = ARRAY_SIZE(res_common_files);
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	for (rft = rfts; rft < rfts + len; rft++) {
+		if ((fflags & rft->fflags) == rft->fflags) {
+			ret = rdtgroup_add_file(kn, rft);
+			if (ret)
+				goto error;
+		}
+	}
+
+	return 0;
+error:
+	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
+	while (--rft >= rfts) {
+		if ((fflags & rft->fflags) == rft->fflags)
+			kernfs_remove_by_name(kn, rft->name);
+	}
+	return ret;
 }
 
-void rdt_get_cache_infofile(struct rdt_resource *r)
+static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
+				      unsigned long fflags)
 {
-	r->info_files = res_cache_info_files;
-	r->nr_info_files = ARRAY_SIZE(res_cache_info_files);
+	struct kernfs_node *kn_subdir;
+	int ret;
+
+	kn_subdir = kernfs_create_dir(kn_info, name,
+				      kn_info->mode, r);
+	if (IS_ERR(kn_subdir))
+		return PTR_ERR(kn_subdir);
+
+	kernfs_get(kn_subdir);
+	ret = rdtgroup_kn_set_ugid(kn_subdir);
+	if (ret)
+		return ret;
+
+	ret = rdtgroup_add_files(kn_subdir, fflags);
+	if (!ret)
+		kernfs_activate(kn_subdir);
+
+	return ret;
 }
 
 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 {
-	struct kernfs_node *kn_subdir;
-	struct rftype *res_info_files;
 	struct rdt_resource *r;
-	int ret, len;
+	unsigned long fflags;
+	char name[32];
+	int ret;
 
 	/* create the directory */
 	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
@@ -638,25 +855,19 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 		return PTR_ERR(kn_info);
 	kernfs_get(kn_info);
 
-	for_each_enabled_rdt_resource(r) {
-		kn_subdir = kernfs_create_dir(kn_info, r->name,
-					      kn_info->mode, r);
-		if (IS_ERR(kn_subdir)) {
-			ret = PTR_ERR(kn_subdir);
-			goto out_destroy;
-		}
-		kernfs_get(kn_subdir);
-		ret = rdtgroup_kn_set_ugid(kn_subdir);
+	for_each_alloc_enabled_rdt_resource(r) {
+		fflags =  r->fflags | RF_CTRL_INFO;
+		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
 		if (ret)
 			goto out_destroy;
+	}
 
-		res_info_files = r->info_files;
-		len = r->nr_info_files;
-
-		ret = rdtgroup_add_files(kn_subdir, res_info_files, len);
+	for_each_mon_enabled_rdt_resource(r) {
+		fflags =  r->fflags | RF_MON_INFO;
+		sprintf(name, "%s_MON", r->name);
+		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
 		if (ret)
 			goto out_destroy;
-		kernfs_activate(kn_subdir);
 	}
 
 	/*
@@ -678,6 +889,39 @@ out_destroy:
 	return ret;
 }
 
+static int
+mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
+		    char *name, struct kernfs_node **dest_kn)
+{
+	struct kernfs_node *kn;
+	int ret;
+
+	/* create the directory */
+	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	if (dest_kn)
+		*dest_kn = kn;
+
+	/*
+	 * This extra ref will be put in kernfs_remove() and guarantees
+	 * that @rdtgrp->kn is always accessible.
+	 */
+	kernfs_get(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
+
+	kernfs_activate(kn);
+
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
 static void l3_qos_cfg_update(void *arg)
 {
 	bool *enable = arg;
@@ -718,14 +962,15 @@ static int cdp_enable(void)
 	struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
 	int ret;
 
-	if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
+	if (!r_l3->alloc_capable || !r_l3data->alloc_capable ||
+	    !r_l3code->alloc_capable)
 		return -EINVAL;
 
 	ret = set_l3_qos_cfg(r_l3, true);
 	if (!ret) {
-		r_l3->enabled = false;
-		r_l3data->enabled = true;
-		r_l3code->enabled = true;
+		r_l3->alloc_enabled = false;
+		r_l3data->alloc_enabled = true;
+		r_l3code->alloc_enabled = true;
 	}
 	return ret;
 }
@@ -734,11 +979,11 @@ static void cdp_disable(void)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
 
-	r->enabled = r->capable;
+	r->alloc_enabled = r->alloc_capable;
 
-	if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
-		rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
-		rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
+	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) {
+		rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false;
+		rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false;
 		set_l3_qos_cfg(r, false);
 	}
 }
@@ -823,10 +1068,16 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 	}
 }
 
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+			     struct rdtgroup *prgrp,
+			     struct kernfs_node **mon_data_kn);
+
 static struct dentry *rdt_mount(struct file_system_type *fs_type,
 				int flags, const char *unused_dev_name,
 				void *data)
 {
+	struct rdt_domain *dom;
+	struct rdt_resource *r;
 	struct dentry *dentry;
 	int ret;
 
@@ -853,15 +1104,54 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 		goto out_cdp;
 	}
 
+	if (rdt_mon_capable) {
+		ret = mongroup_create_dir(rdtgroup_default.kn,
+					  NULL, "mon_groups",
+					  &kn_mongrp);
+		if (ret) {
+			dentry = ERR_PTR(ret);
+			goto out_info;
+		}
+		kernfs_get(kn_mongrp);
+
+		ret = mkdir_mondata_all(rdtgroup_default.kn,
+					&rdtgroup_default, &kn_mondata);
+		if (ret) {
+			dentry = ERR_PTR(ret);
+			goto out_mongrp;
+		}
+		kernfs_get(kn_mondata);
+		rdtgroup_default.mon.mon_data_kn = kn_mondata;
+	}
+
 	dentry = kernfs_mount(fs_type, flags, rdt_root,
 			      RDTGROUP_SUPER_MAGIC, NULL);
 	if (IS_ERR(dentry))
-		goto out_destroy;
+		goto out_mondata;
+
+	if (rdt_alloc_capable)
+		static_branch_enable(&rdt_alloc_enable_key);
+	if (rdt_mon_capable)
+		static_branch_enable(&rdt_mon_enable_key);
+
+	if (rdt_alloc_capable || rdt_mon_capable)
+		static_branch_enable(&rdt_enable_key);
+
+	if (is_mbm_enabled()) {
+		r = &rdt_resources_all[RDT_RESOURCE_L3];
+		list_for_each_entry(dom, &r->domains, list)
+			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+	}
 
-	static_branch_enable(&rdt_enable_key);
 	goto out;
 
-out_destroy:
+out_mondata:
+	if (rdt_mon_capable)
+		kernfs_remove(kn_mondata);
+out_mongrp:
+	if (rdt_mon_capable)
+		kernfs_remove(kn_mongrp);
+out_info:
 	kernfs_remove(kn_info);
 out_cdp:
 	cdp_disable();
@@ -909,6 +1199,18 @@ static int reset_all_ctrls(struct rdt_resource *r)
 	return 0;
 }
 
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (rdt_alloc_capable &&
+		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (rdt_mon_capable &&
+		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+}
+
 /*
  * Move tasks from one to the other group. If @from is NULL, then all tasks
  * in the systems are moved unconditionally (used for teardown).
@@ -924,8 +1226,11 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
 
 	read_lock(&tasklist_lock);
 	for_each_process_thread(p, t) {
-		if (!from || t->closid == from->closid) {
+		if (!from || is_closid_match(t, from) ||
+		    is_rmid_match(t, from)) {
 			t->closid = to->closid;
+			t->rmid = to->mon.rmid;
+
 #ifdef CONFIG_SMP
 			/*
 			 * This is safe on x86 w/o barriers as the ordering
@@ -944,6 +1249,19 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
 	read_unlock(&tasklist_lock);
 }
 
+static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
+{
+	struct rdtgroup *sentry, *stmp;
+	struct list_head *head;
+
+	head = &rdtgrp->mon.crdtgrp_list;
+	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+		free_rmid(sentry->mon.rmid);
+		list_del(&sentry->mon.crdtgrp_list);
+		kfree(sentry);
+	}
+}
+
 /*
  * Forcibly remove all of subdirectories under root.
  */
@@ -955,6 +1273,9 @@ static void rmdir_all_sub(void)
 	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
 
 	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
+		/* Free any child rmids */
+		free_all_child_rdtgrp(rdtgrp);
+
 		/* Remove each rdtgroup other than root */
 		if (rdtgrp == &rdtgroup_default)
 			continue;
@@ -967,16 +1288,20 @@ static void rmdir_all_sub(void)
 		cpumask_or(&rdtgroup_default.cpu_mask,
 			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
 
+		free_rmid(rdtgrp->mon.rmid);
+
 		kernfs_remove(rdtgrp->kn);
 		list_del(&rdtgrp->rdtgroup_list);
 		kfree(rdtgrp);
 	}
 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
 	get_online_cpus();
-	rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
+	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
 	put_online_cpus();
 
 	kernfs_remove(kn_info);
+	kernfs_remove(kn_mongrp);
+	kernfs_remove(kn_mondata);
 }
 
 static void rdt_kill_sb(struct super_block *sb)
@@ -986,10 +1311,12 @@ static void rdt_kill_sb(struct super_block *sb)
 	mutex_lock(&rdtgroup_mutex);
 
 	/*Put everything back to default values. */
-	for_each_enabled_rdt_resource(r)
+	for_each_alloc_enabled_rdt_resource(r)
 		reset_all_ctrls(r);
 	cdp_disable();
 	rmdir_all_sub();
+	static_branch_disable(&rdt_alloc_enable_key);
+	static_branch_disable(&rdt_mon_enable_key);
 	static_branch_disable(&rdt_enable_key);
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
@@ -1001,46 +1328,223 @@ static struct file_system_type rdt_fs_type = {
 	.kill_sb = rdt_kill_sb,
 };
 
-static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
-			  umode_t mode)
+static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
+		       void *priv)
 {
-	struct rdtgroup *parent, *rdtgrp;
 	struct kernfs_node *kn;
-	int ret, closid;
+	int ret = 0;
 
-	/* Only allow mkdir in the root directory */
-	if (parent_kn != rdtgroup_default.kn)
-		return -EPERM;
+	kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+				  &kf_mondata_ops, priv, NULL, NULL);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
 
-	/* Do not accept '\n' to avoid unparsable situation. */
-	if (strchr(name, '\n'))
-		return -EINVAL;
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret) {
+		kernfs_remove(kn);
+		return ret;
+	}
 
-	parent = rdtgroup_kn_lock_live(parent_kn);
-	if (!parent) {
-		ret = -ENODEV;
-		goto out_unlock;
+	return ret;
+}
+
+/*
+ * Remove all subdirectories of mon_data of ctrl_mon groups
+ * and monitor groups with given domain id.
+ */
+void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
+{
+	struct rdtgroup *prgrp, *crgrp;
+	char name[32];
+
+	if (!r->mon_enabled)
+		return;
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		sprintf(name, "mon_%s_%02d", r->name, dom_id);
+		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+
+		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
 	}
+}
 
-	ret = closid_alloc();
-	if (ret < 0)
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+				struct rdt_domain *d,
+				struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+	union mon_data_bits priv;
+	struct kernfs_node *kn;
+	struct mon_evt *mevt;
+	struct rmid_read rr;
+	char name[32];
+	int ret;
+
+	sprintf(name, "mon_%s_%02d", r->name, d->id);
+	/* create the directory */
+	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	/*
+	 * This extra ref will be put in kernfs_remove() and guarantees
+	 * that kn is always accessible.
+	 */
+	kernfs_get(kn);
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
+
+	if (WARN_ON(list_empty(&r->evt_list))) {
+		ret = -EPERM;
+		goto out_destroy;
+	}
+
+	priv.u.rid = r->rid;
+	priv.u.domid = d->id;
+	list_for_each_entry(mevt, &r->evt_list, list) {
+		priv.u.evtid = mevt->evtid;
+		ret = mon_addfile(kn, mevt->name, priv.priv);
+		if (ret)
+			goto out_destroy;
+
+		if (is_mbm_event(mevt->evtid))
+			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
+	}
+	kernfs_activate(kn);
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
+
+/*
+ * Add all subdirectories of mon_data for "ctrl_mon" groups
+ * and "monitor" groups with given domain id.
+ */
+void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+				    struct rdt_domain *d)
+{
+	struct kernfs_node *parent_kn;
+	struct rdtgroup *prgrp, *crgrp;
+	struct list_head *head;
+
+	if (!r->mon_enabled)
+		return;
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		parent_kn = prgrp->mon.mon_data_kn;
+		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+			parent_kn = crgrp->mon.mon_data_kn;
+			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+		}
+	}
+}
+
+static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
+				       struct rdt_resource *r,
+				       struct rdtgroup *prgrp)
+{
+	struct rdt_domain *dom;
+	int ret;
+
+	list_for_each_entry(dom, &r->domains, list) {
+		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * This creates a directory mon_data which contains the monitored data.
+ *
+ * mon_data has one directory for each domain whic are named
+ * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
+ * with L3 domain looks as below:
+ * ./mon_data:
+ * mon_L3_00
+ * mon_L3_01
+ * mon_L3_02
+ * ...
+ *
+ * Each domain directory has one file per event:
+ * ./mon_L3_00/:
+ * llc_occupancy
+ *
+ */
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+			     struct rdtgroup *prgrp,
+			     struct kernfs_node **dest_kn)
+{
+	struct rdt_resource *r;
+	struct kernfs_node *kn;
+	int ret;
+
+	/*
+	 * Create the mon_data directory first.
+	 */
+	ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+	if (ret)
+		return ret;
+
+	if (dest_kn)
+		*dest_kn = kn;
+
+	/*
+	 * Create the subdirectories for each domain. Note that all events
+	 * in a domain like L3 are grouped into a resource whose domain is L3
+	 */
+	for_each_mon_enabled_rdt_resource(r) {
+		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
+		if (ret)
+			goto out_destroy;
+	}
+
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
+
+static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+			     struct kernfs_node *prgrp_kn,
+			     const char *name, umode_t mode,
+			     enum rdt_group_type rtype, struct rdtgroup **r)
+{
+	struct rdtgroup *prdtgrp, *rdtgrp;
+	struct kernfs_node *kn;
+	uint files = 0;
+	int ret;
+
+	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+	if (!prdtgrp) {
+		ret = -ENODEV;
 		goto out_unlock;
-	closid = ret;
+	}
 
 	/* allocate the rdtgroup. */
 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
 	if (!rdtgrp) {
 		ret = -ENOSPC;
-		goto out_closid_free;
+		goto out_unlock;
 	}
-	rdtgrp->closid = closid;
-	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+	*r = rdtgrp;
+	rdtgrp->mon.parent = prdtgrp;
+	rdtgrp->type = rtype;
+	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
 
 	/* kernfs creates the directory for rdtgrp */
-	kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
+	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
 	if (IS_ERR(kn)) {
 		ret = PTR_ERR(kn);
-		goto out_cancel_ref;
+		goto out_free_rgrp;
 	}
 	rdtgrp->kn = kn;
 
@@ -1056,43 +1560,211 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	if (ret)
 		goto out_destroy;
 
-	ret = rdtgroup_add_files(kn, rdtgroup_base_files,
-				 ARRAY_SIZE(rdtgroup_base_files));
+	files = RFTYPE_BASE | RFTYPE_CTRL;
+	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
+	ret = rdtgroup_add_files(kn, files);
 	if (ret)
 		goto out_destroy;
 
+	if (rdt_mon_capable) {
+		ret = alloc_rmid();
+		if (ret < 0)
+			goto out_destroy;
+		rdtgrp->mon.rmid = ret;
+
+		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+		if (ret)
+			goto out_idfree;
+	}
 	kernfs_activate(kn);
 
-	ret = 0;
-	goto out_unlock;
+	/*
+	 * The caller unlocks the prgrp_kn upon success.
+	 */
+	return 0;
 
+out_idfree:
+	free_rmid(rdtgrp->mon.rmid);
 out_destroy:
 	kernfs_remove(rdtgrp->kn);
-out_cancel_ref:
-	list_del(&rdtgrp->rdtgroup_list);
+out_free_rgrp:
 	kfree(rdtgrp);
-out_closid_free:
-	closid_free(closid);
 out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
+	rdtgroup_kn_unlock(prgrp_kn);
 	return ret;
 }
 
-static int rdtgroup_rmdir(struct kernfs_node *kn)
+static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
+{
+	kernfs_remove(rgrp->kn);
+	free_rmid(rgrp->mon.rmid);
+	kfree(rgrp);
+}
+
+/*
+ * Create a monitor group under "mon_groups" directory of a control
+ * and monitor group(ctrl_mon). This is a resource group
+ * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
+ */
+static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
+			      struct kernfs_node *prgrp_kn,
+			      const char *name,
+			      umode_t mode)
+{
+	struct rdtgroup *rdtgrp, *prgrp;
+	int ret;
+
+	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
+				&rdtgrp);
+	if (ret)
+		return ret;
+
+	prgrp = rdtgrp->mon.parent;
+	rdtgrp->closid = prgrp->closid;
+
+	/*
+	 * Add the rdtgrp to the list of rdtgrps the parent
+	 * ctrl_mon group has to track.
+	 */
+	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+
+	rdtgroup_kn_unlock(prgrp_kn);
+	return ret;
+}
+
+/*
+ * These are rdtgroups created under the root directory. Can be used
+ * to allocate and monitor resources.
+ */
+static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+				   struct kernfs_node *prgrp_kn,
+				   const char *name, umode_t mode)
 {
-	int ret, cpu, closid = rdtgroup_default.closid;
 	struct rdtgroup *rdtgrp;
-	cpumask_var_t tmpmask;
+	struct kernfs_node *kn;
+	u32 closid;
+	int ret;
 
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		return -ENOMEM;
+	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
+				&rdtgrp);
+	if (ret)
+		return ret;
 
-	rdtgrp = rdtgroup_kn_lock_live(kn);
-	if (!rdtgrp) {
-		ret = -EPERM;
-		goto out;
+	kn = rdtgrp->kn;
+	ret = closid_alloc();
+	if (ret < 0)
+		goto out_common_fail;
+	closid = ret;
+
+	rdtgrp->closid = closid;
+	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+
+	if (rdt_mon_capable) {
+		/*
+		 * Create an empty mon_groups directory to hold the subset
+		 * of tasks and cpus to monitor.
+		 */
+		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+		if (ret)
+			goto out_id_free;
 	}
 
+	goto out_unlock;
+
+out_id_free:
+	closid_free(closid);
+	list_del(&rdtgrp->rdtgroup_list);
+out_common_fail:
+	mkdir_rdt_prepare_clean(rdtgrp);
+out_unlock:
+	rdtgroup_kn_unlock(prgrp_kn);
+	return ret;
+}
+
+/*
+ * We allow creating mon groups only with in a directory called "mon_groups"
+ * which is present in every ctrl_mon group. Check if this is a valid
+ * "mon_groups" directory.
+ *
+ * 1. The directory should be named "mon_groups".
+ * 2. The mon group itself should "not" be named "mon_groups".
+ *   This makes sure "mon_groups" directory always has a ctrl_mon group
+ *   as parent.
+ */
+static bool is_mon_groups(struct kernfs_node *kn, const char *name)
+{
+	return (!strcmp(kn->name, "mon_groups") &&
+		strcmp(name, "mon_groups"));
+}
+
+static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+			  umode_t mode)
+{
+	/* Do not accept '\n' to avoid unparsable situation. */
+	if (strchr(name, '\n'))
+		return -EINVAL;
+
+	/*
+	 * If the parent directory is the root directory and RDT
+	 * allocation is supported, add a control and monitoring
+	 * subdirectory
+	 */
+	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
+		return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
+
+	/*
+	 * If RDT monitoring is supported and the parent directory is a valid
+	 * "mon_groups" directory, add a monitoring subdirectory.
+	 */
+	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
+		return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
+
+	return -EPERM;
+}
+
+static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
+			      cpumask_var_t tmpmask)
+{
+	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+	int cpu;
+
+	/* Give any tasks back to the parent group */
+	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
+
+	/* Update per cpu rmid of the moved CPUs first */
+	for_each_cpu(cpu, &rdtgrp->cpu_mask)
+		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
+	/*
+	 * Update the MSR on moved CPUs and CPUs which have moved
+	 * task running on them.
+	 */
+	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+	update_closid_rmid(tmpmask, NULL);
+
+	rdtgrp->flags = RDT_DELETED;
+	free_rmid(rdtgrp->mon.rmid);
+
+	/*
+	 * Remove the rdtgrp from the parent ctrl_mon group's list
+	 */
+	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+	list_del(&rdtgrp->mon.crdtgrp_list);
+
+	/*
+	 * one extra hold on this, will drop when we kfree(rdtgrp)
+	 * in rdtgroup_kn_unlock()
+	 */
+	kernfs_get(kn);
+	kernfs_remove(rdtgrp->kn);
+
+	return 0;
+}
+
+static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
+			       cpumask_var_t tmpmask)
+{
+	int cpu;
+
 	/* Give any tasks back to the default group */
 	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
 
@@ -1100,18 +1772,28 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
 	cpumask_or(&rdtgroup_default.cpu_mask,
 		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
 
-	/* Update per cpu closid of the moved CPUs first */
-	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		per_cpu(cpu_closid, cpu) = closid;
+	/* Update per cpu closid and rmid of the moved CPUs first */
+	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
+		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
+		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
+	}
+
 	/*
 	 * Update the MSR on moved CPUs and CPUs which have moved
 	 * task running on them.
 	 */
 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
-	rdt_update_closid(tmpmask, NULL);
+	update_closid_rmid(tmpmask, NULL);
 
 	rdtgrp->flags = RDT_DELETED;
 	closid_free(rdtgrp->closid);
+	free_rmid(rdtgrp->mon.rmid);
+
+	/*
+	 * Free all the child monitor group rmids.
+	 */
+	free_all_child_rdtgrp(rdtgrp);
+
 	list_del(&rdtgrp->rdtgroup_list);
 
 	/*
@@ -1120,7 +1802,41 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
 	 */
 	kernfs_get(kn);
 	kernfs_remove(rdtgrp->kn);
-	ret = 0;
+
+	return 0;
+}
+
+static int rdtgroup_rmdir(struct kernfs_node *kn)
+{
+	struct kernfs_node *parent_kn = kn->parent;
+	struct rdtgroup *rdtgrp;
+	cpumask_var_t tmpmask;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rdtgrp = rdtgroup_kn_lock_live(kn);
+	if (!rdtgrp) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/*
+	 * If the rdtgroup is a ctrl_mon group and parent directory
+	 * is the root directory, remove the ctrl_mon group.
+	 *
+	 * If the rdtgroup is a mon group and parent directory
+	 * is a valid "mon_groups" directory, remove the mon group.
+	 */
+	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn)
+		ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
+	else if (rdtgrp->type == RDTMON_GROUP &&
+		 is_mon_groups(parent_kn, kn->name))
+		ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
+	else
+		ret = -EPERM;
+
 out:
 	rdtgroup_kn_unlock(kn);
 	free_cpumask_var(tmpmask);
@@ -1129,7 +1845,7 @@ out:
 
 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
 {
-	if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled)
+	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
 		seq_puts(seq, ",cdp");
 	return 0;
 }
@@ -1153,10 +1869,13 @@ static int __init rdtgroup_setup_root(void)
 	mutex_lock(&rdtgroup_mutex);
 
 	rdtgroup_default.closid = 0;
+	rdtgroup_default.mon.rmid = 0;
+	rdtgroup_default.type = RDTCTRL_GROUP;
+	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
+
 	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
 
-	ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
-				 ARRAY_SIZE(rdtgroup_base_files));
+	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
 	if (ret) {
 		kernfs_destroy_root(rdt_root);
 		goto out;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6dde0497efc7..3b413065c613 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -51,6 +51,7 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/reboot.h>
+#include <asm/set_memory.h>
 
 #include "mce-internal.h"
 
@@ -1051,6 +1052,48 @@ static int do_memory_failure(struct mce *m)
 	return ret;
 }
 
+#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
+
+void arch_unmap_kpfn(unsigned long pfn)
+{
+	unsigned long decoy_addr;
+
+	/*
+	 * Unmap this page from the kernel 1:1 mappings to make sure
+	 * we don't log more errors because of speculative access to
+	 * the page.
+	 * We would like to just call:
+	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
+	 * but doing that would radically increase the odds of a
+	 * speculative access to the posion page because we'd have
+	 * the virtual address of the kernel 1:1 mapping sitting
+	 * around in registers.
+	 * Instead we get tricky.  We create a non-canonical address
+	 * that looks just like the one we want, but has bit 63 flipped.
+	 * This relies on set_memory_np() not checking whether we passed
+	 * a legal address.
+	 */
+
+/*
+ * Build time check to see if we have a spare virtual bit. Don't want
+ * to leave this until run time because most developers don't have a
+ * system that can exercise this code path. This will only become a
+ * problem if/when we move beyond 5-level page tables.
+ *
+ * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
+ */
+#if PGDIR_SHIFT + 9 < 63
+	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+#else
+#error "no unused virtual bit available"
+#endif
+
+	if (set_memory_np(decoy_addr, 1))
+		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+
+}
+#endif
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9e314bcf67cc..40e28ed77fbf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -201,8 +201,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 		wrmsr(smca_config, low, high);
 	}
 
-	/* Collect bank_info using CPU 0 for now. */
-	if (cpu)
+	/* Return early if this bank was already initialized. */
+	if (smca_banks[bank].hwid)
 		return;
 
 	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
@@ -216,11 +216,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
 		s_hwid = &smca_hwid_mcatypes[i];
 		if (hwid_mcatype == s_hwid->hwid_mcatype) {
-
-			WARN(smca_banks[bank].hwid,
-			     "Bank %s already initialized!\n",
-			     smca_get_name(s_hwid->bank_type));
-
 			smca_banks[bank].hwid = s_hwid;
 			smca_banks[bank].id = low;
 			smca_banks[bank].sysfs_id = s_hwid->count++;
@@ -776,24 +771,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	mce_log(&m);
 }
 
-static inline void __smp_deferred_error_interrupt(void)
-{
-	inc_irq_stat(irq_deferred_error_count);
-	deferred_error_int_vector();
-}
-
 asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
 {
 	entering_irq();
-	__smp_deferred_error_interrupt();
-	exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
-{
-	entering_irq();
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
-	__smp_deferred_error_interrupt();
+	inc_irq_stat(irq_deferred_error_count);
+	deferred_error_int_vector();
 	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
 	exiting_ack_irq();
 }
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index f7370abd33c6..2da67b70ba98 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -390,26 +390,12 @@ static void unexpected_thermal_interrupt(void)
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
 
-static inline void __smp_thermal_interrupt(void)
-{
-	inc_irq_stat(irq_thermal_count);
-	smp_thermal_vector();
-}
-
-asmlinkage __visible void __irq_entry
-smp_thermal_interrupt(struct pt_regs *regs)
-{
-	entering_irq();
-	__smp_thermal_interrupt();
-	exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry
-smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r)
 {
 	entering_irq();
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
-	__smp_thermal_interrupt();
+	inc_irq_stat(irq_thermal_count);
+	smp_thermal_vector();
 	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
 	exiting_ack_irq();
 }
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index bb0e75eed10a..5e7249e42f8f 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,24 +17,12 @@ static void default_threshold_interrupt(void)
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
 
-static inline void __smp_threshold_interrupt(void)
-{
-	inc_irq_stat(irq_threshold_count);
-	mce_threshold_vector();
-}
-
 asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
 {
 	entering_irq();
-	__smp_threshold_interrupt();
-	exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
-{
-	entering_irq();
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
-	__smp_threshold_interrupt();
+	inc_irq_stat(irq_threshold_count);
+	mce_threshold_vector();
 	trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
 	exiting_ack_irq();
 }
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 21b185793c80..c6daec4bdba5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -400,9 +400,12 @@ static void update_cache(struct ucode_patch *new_patch)
 
 	list_for_each_entry(p, &microcode_cache, plist) {
 		if (p->equiv_cpu == new_patch->equiv_cpu) {
-			if (p->patch_id >= new_patch->patch_id)
+			if (p->patch_id >= new_patch->patch_id) {
 				/* we already have the latest patch */
+				kfree(new_patch->data);
+				kfree(new_patch);
 				return;
+			}
 
 			list_replace(&p->plist, &new_patch->plist);
 			kfree(p->data);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 59edbe9d4ccb..8f7a9bbad514 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -146,18 +146,18 @@ static bool microcode_matches(struct microcode_header_intel *mc_header,
 	return false;
 }
 
-static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
+static struct ucode_patch *memdup_patch(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
 
 	p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
 	if (!p)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
 	p->data = kmemdup(data, size, GFP_KERNEL);
 	if (!p->data) {
 		kfree(p);
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 	}
 
 	return p;
@@ -183,8 +183,8 @@ static void save_microcode_patch(void *data, unsigned int size)
 			if (mc_hdr->rev <= mc_saved_hdr->rev)
 				continue;
 
-			p = __alloc_microcode_buf(data, size);
-			if (IS_ERR(p))
+			p = memdup_patch(data, size);
+			if (!p)
 				pr_err("Error allocating buffer %p\n", data);
 			else
 				list_replace(&iter->plist, &p->plist);
@@ -196,24 +196,25 @@ static void save_microcode_patch(void *data, unsigned int size)
 	 * newly found.
 	 */
 	if (!prev_found) {
-		p = __alloc_microcode_buf(data, size);
-		if (IS_ERR(p))
+		p = memdup_patch(data, size);
+		if (!p)
 			pr_err("Error allocating buffer for %p\n", data);
 		else
 			list_add_tail(&p->plist, &microcode_cache);
 	}
 
+	if (!p)
+		return;
+
 	/*
 	 * Save for early loading. On 32-bit, that needs to be a physical
 	 * address as the APs are running from physical addresses, before
 	 * paging has been enabled.
 	 */
-	if (p) {
-		if (IS_ENABLED(CONFIG_X86_32))
-			intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
-		else
-			intel_ucode_patch = p->data;
-	}
+	if (IS_ENABLED(CONFIG_X86_32))
+		intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
+	else
+		intel_ucode_patch = p->data;
 }
 
 static int microcode_sanity_check(void *mc, int print_err)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 70e717fccdd6..3b3f713e15e5 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,13 +59,8 @@ void hyperv_vector_handler(struct pt_regs *regs)
 void hv_setup_vmbus_irq(void (*handler)(void))
 {
 	vmbus_handler = handler;
-	/*
-	 * Setup the IDT for hypervisor callback. Prevent reallocation
-	 * at module reload.
-	 */
-	if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
-		alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
-				hyperv_callback_vector);
+	/* Setup the IDT for hypervisor callback */
+	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 }
 
 void hv_remove_vmbus_irq(void)
@@ -184,9 +179,15 @@ static void __init ms_hyperv_init_platform(void)
 	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-	pr_info("HyperV: features 0x%x, hints 0x%x\n",
+	pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
 		ms_hyperv.features, ms_hyperv.hints);
 
+	ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
+	ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+
+	pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
+		 ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
+
 	/*
 	 * Extract host information.
 	 */
@@ -219,7 +220,7 @@ static void __init ms_hyperv_init_platform(void)
 		rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
 		hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
 		lapic_timer_frequency = hv_lapic_frequency;
-		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+		pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
 			lapic_timer_frequency);
 	}
 
@@ -249,11 +250,12 @@ static void __init ms_hyperv_init_platform(void)
 	 * Setup the hook to get control post apic initialization.
 	 */
 	x86_platform.apic_post_init = hyperv_init;
+	hyperv_setup_mmu_ops();
 #endif
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
-	.name			= "Microsoft HyperV",
+	.name			= "Microsoft Hyper-V",
 	.detect			= ms_hyperv_platform,
 	.init_platform		= ms_hyperv_init_platform,
 };
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 23c23508c012..05459ad3db46 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_HW_PSTATE,	CPUID_EDX,  7, 0x80000007, 0 },
 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
+	{ X86_FEATURE_SME,		CPUID_EAX,  0, 0x8000001f, 0 },
 	{ 0, 0, 0, 0, 0 }
 };
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index dbce3cca94cb..f13b4c00a5de 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -94,6 +94,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (stack_name)
 			printk("%s <%s>\n", log_lvl, stack_name);
 
+		if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
+			__show_regs(regs, 0);
+
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
 		 * same time, follow proper stack frames with the unwinder.
@@ -118,10 +121,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			 * Don't print regs->ip again if it was already printed
 			 * by __show_regs() below.
 			 */
-			if (regs && stack == &regs->ip) {
-				unwind_next_frame(&state);
-				continue;
-			}
+			if (regs && stack == &regs->ip)
+				goto next;
 
 			if (stack == ret_addr_p)
 				reliable = 1;
@@ -144,6 +145,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			if (!reliable)
 				continue;
 
+next:
 			/*
 			 * Get the next frame from the unwinder.  No need to
 			 * check for an error: if anything goes wrong, the rest
@@ -153,7 +155,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 			/* if the frame has entry regs, print them */
 			regs = unwind_get_entry_regs(&state);
-			if (regs)
+			if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
 				__show_regs(regs, 0);
 		}
 
@@ -265,7 +267,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
 #ifdef CONFIG_X86_32
 	if (user_mode(regs)) {
 		sp = regs->sp;
-		ss = regs->ss & 0xffff;
+		ss = regs->ss;
 	} else {
 		sp = kernel_stack_pointer(regs);
 		savesegment(ss, ss);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e5f0b40e66d2..4f0481474903 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack < begin || stack > end)
+	if (stack <= begin || stack > end)
 		return false;
 
 	info->type	= STACK_TYPE_IRQ;
@@ -62,7 +62,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack < begin || stack > end)
+	if (stack <= begin || stack > end)
 		return false;
 
 	info->type	= STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 3e1471d57487..225af4184f06 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -55,7 +55,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 		begin = end - (exception_stack_sizes[k] / sizeof(long));
 		regs  = (struct pt_regs *)end - 1;
 
-		if (stack < begin || stack >= end)
+		if (stack <= begin || stack >= end)
 			continue;
 
 		info->type	= STACK_TYPE_EXCEPTION + k;
@@ -78,7 +78,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack < begin || stack > end)
+	if (stack <= begin || stack > end)
 		return false;
 
 	info->type	= STACK_TYPE_IRQ;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 532da61d605c..71c11ad5643e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -96,7 +96,8 @@ EXPORT_SYMBOL_GPL(e820__mapped_any);
  * Note: this function only works correctly once the E820 table is sorted and
  * not-overlapping (at least for the range specified), which is the case normally.
  */
-bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
+					     enum e820_type type)
 {
 	int i;
 
@@ -122,9 +123,28 @@ bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
 		 * coverage of the desired range exists:
 		 */
 		if (start >= end)
-			return 1;
+			return entry;
 	}
-	return 0;
+
+	return NULL;
+}
+
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ */
+bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+{
+	return __e820__mapped_all(start, end, type);
+}
+
+/*
+ * This function returns the type associated with the range <start,end>.
+ */
+int e820__get_entry_type(u64 start, u64 end)
+{
+	struct e820_entry *entry = __e820__mapped_all(start, end, 0);
+
+	return entry ? entry->type : -EINVAL;
 }
 
 /*
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index d907c3d8633f..927abeaf63e2 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -12,10 +12,10 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/delay.h>
-#include <linux/dmi.h>
 #include <linux/pci_ids.h>
 #include <linux/bcma/bcma.h>
 #include <linux/bcma/bcma_regs.h>
+#include <linux/platform_data/x86/apple.h>
 #include <drm/i915_drm.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
@@ -527,6 +527,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_BXT_IDS(&gen9_early_ops),
 	INTEL_KBL_IDS(&gen9_early_ops),
 	INTEL_GLK_IDS(&gen9_early_ops),
+	INTEL_CNL_IDS(&gen9_early_ops),
 };
 
 static void __init
@@ -593,7 +594,7 @@ static void __init apple_airport_reset(int bus, int slot, int func)
 	u64 addr;
 	int i;
 
-	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+	if (!x86_apple_machine)
 		return;
 
 	/* Card may have been put into PCI_D3hot by grub quirk */
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
new file mode 100644
index 000000000000..f260e452e4f8
--- /dev/null
+++ b/arch/x86/kernel/eisa.c
@@ -0,0 +1,19 @@
+/*
+ * EISA specific code
+ *
+ * This file is licensed under the GPL V2
+ */
+#include <linux/ioport.h>
+#include <linux/eisa.h>
+#include <linux/io.h>
+
+static __init int eisa_bus_probe(void)
+{
+	void __iomem *p = ioremap(0x0FFFD9, 4);
+
+	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+		EISA_bus = 1;
+	iounmap(p);
+	return 0;
+}
+subsys_initcall(eisa_bus_probe);
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 6b91e2eb8d3f..9c4e7ba6870c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -195,7 +195,7 @@ void init_espfix_ap(int cpu)
 
 	pte_p = pte_offset_kernel(&pmd, addr);
 	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
-	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
+	pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
 
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 538ec012b371..cf2ce063f65a 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/memblock.h>
 
+#include <asm/desc.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/e820/api.h>
@@ -30,6 +31,9 @@ static void __init i386_default_early_setup(void)
 asmlinkage __visible void __init i386_start_kernel(void)
 {
 	cr4_init_shadow();
+
+	idt_setup_early_handler();
+
 	sanitize_boot_params(&boot_params);
 
 	x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 9ba79543d9ee..bab4fa579450 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -14,6 +14,7 @@
 #include <linux/start_kernel.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
+#include <linux/mem_encrypt.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -33,7 +34,6 @@
 /*
  * Manage page tables very early on.
  */
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
@@ -45,9 +45,11 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
 	return ptr - (void *)_text + (void *)physaddr;
 }
 
-void __head __startup_64(unsigned long physaddr)
+unsigned long __head __startup_64(unsigned long physaddr,
+				  struct boot_params *bp)
 {
 	unsigned long load_delta, *p;
+	unsigned long pgtable_flags;
 	pgdval_t *pgd;
 	p4dval_t *p4d;
 	pudval_t *pud;
@@ -69,6 +71,12 @@ void __head __startup_64(unsigned long physaddr)
 	if (load_delta & ~PMD_PAGE_MASK)
 		for (;;);
 
+	/* Activate Secure Memory Encryption (SME) if supported and enabled */
+	sme_enable(bp);
+
+	/* Include the SME encryption mask in the fixup value */
+	load_delta += sme_get_me_mask();
+
 	/* Fixup the physical addresses in the page table */
 
 	pgd = fixup_pointer(&early_top_pgt, physaddr);
@@ -92,31 +100,35 @@ void __head __startup_64(unsigned long physaddr)
 	 * creates a bunch of nonsense entries but that is fine --
 	 * it avoids problems around wraparound.
 	 */
+
 	next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
 	pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
 	pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
 
+	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
+
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
 		p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
 
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-		pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
-		pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
+		pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
 
 		i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
-		p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-		p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+		p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
+		p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
 	} else {
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-		pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-		pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+		pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
+		pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
 	}
 
 	i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
-	pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
-	pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+	pud[i + 0] = (pudval_t)pmd + pgtable_flags;
+	pud[i + 1] = (pudval_t)pmd + pgtable_flags;
 
 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+	pmd_entry += sme_get_me_mask();
 	pmd_entry +=  physaddr;
 
 	for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
@@ -137,9 +149,30 @@ void __head __startup_64(unsigned long physaddr)
 			pmd[i] += load_delta;
 	}
 
-	/* Fixup phys_base */
+	/*
+	 * Fixup phys_base - remove the memory encryption mask to obtain
+	 * the true physical address.
+	 */
 	p = fixup_pointer(&phys_base, physaddr);
-	*p += load_delta;
+	*p += load_delta - sme_get_me_mask();
+
+	/* Encrypt the kernel (if SME is active) */
+	sme_encrypt_kernel();
+
+	/*
+	 * Return the SME encryption mask (if SME is active) to be used as a
+	 * modifier for the initial pgdir entry programmed into CR3.
+	 */
+	return sme_get_me_mask();
+}
+
+unsigned long __startup_secondary_64(void)
+{
+	/*
+	 * Return the SME encryption mask (if SME is active) to be used as a
+	 * modifier for the initial pgdir entry programmed into CR3.
+	 */
+	return sme_get_me_mask();
 }
 
 /* Wipe all early page tables except for the kernel symbol map */
@@ -147,17 +180,17 @@ static void __init reset_early_page_tables(void)
 {
 	memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
 	next_early_pgt = 0;
-	write_cr3(__pa_nodebug(early_top_pgt));
+	write_cr3(__sme_pa_nodebug(early_top_pgt));
 }
 
 /* Create a new PMD entry */
-int __init early_make_pgtable(unsigned long address)
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
 {
 	unsigned long physaddr = address - __PAGE_OFFSET;
 	pgdval_t pgd, *pgd_p;
 	p4dval_t p4d, *p4d_p;
 	pudval_t pud, *pud_p;
-	pmdval_t pmd, *pmd_p;
+	pmdval_t *pmd_p;
 
 	/* Invalid address or early pgt is done ?  */
 	if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
@@ -216,12 +249,21 @@ again:
 		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
 		*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
-	pmd = (physaddr & PMD_MASK) + early_pmd_flags;
 	pmd_p[pmd_index(address)] = pmd;
 
 	return 0;
 }
 
+int __init early_make_pgtable(unsigned long address)
+{
+	unsigned long physaddr = address - __PAGE_OFFSET;
+	pmdval_t pmd;
+
+	pmd = (physaddr & PMD_MASK) + early_pmd_flags;
+
+	return __early_make_pgtable(address, pmd);
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
 static void __init clear_bss(void)
@@ -244,6 +286,12 @@ static void __init copy_bootdata(char *real_mode_data)
 	char * command_line;
 	unsigned long cmd_line_ptr;
 
+	/*
+	 * If SME is active, this will create decrypted mappings of the
+	 * boot data in advance of the copy operations.
+	 */
+	sme_map_bootdata(real_mode_data);
+
 	memcpy(&boot_params, real_mode_data, sizeof boot_params);
 	sanitize_boot_params(&boot_params);
 	cmd_line_ptr = get_cmd_line_ptr();
@@ -251,12 +299,18 @@ static void __init copy_bootdata(char *real_mode_data)
 		command_line = __va(cmd_line_ptr);
 		memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	}
+
+	/*
+	 * The old boot data is no longer needed and won't be reserved,
+	 * freeing up that memory for use by the system. If SME is active,
+	 * we need to remove the mappings that were created so that the
+	 * memory doesn't remain mapped as decrypted.
+	 */
+	sme_unmap_bootdata(real_mode_data);
 }
 
 asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 {
-	int i;
-
 	/*
 	 * Build-time sanity checks on the kernel image and module
 	 * area mappings. (these are purely build-time and produce no code)
@@ -280,11 +334,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 
 	clear_page(init_top_pgt);
 
+	/*
+	 * SME support may update early_pmd_flags to include the memory
+	 * encryption mask, so it needs to be called before anything
+	 * that may generate a page fault.
+	 */
+	sme_early_init();
+
 	kasan_early_init();
 
-	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
-		set_intr_gate(i, early_idt_handler_array[i]);
-	load_idt((const struct desc_ptr *)&idt_descr);
+	idt_setup_early_handler();
 
 	copy_bootdata(__va(real_mode_data));
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 1f85ee8f9439..9ed3074d0d27 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -155,7 +155,6 @@ ENTRY(startup_32)
 	jmp *%eax
 
 .Lbad_subarch:
-WEAK(lguest_entry)
 WEAK(xen_entry)
 	/* Unknown implementation; there's really
 	   nothing we can do at this point. */
@@ -165,7 +164,6 @@ WEAK(xen_entry)
 
 subarch_entries:
 	.long .Ldefault_entry		/* normal x86/PC */
-	.long lguest_entry		/* lguest hypervisor */
 	.long xen_entry			/* Xen hypervisor */
 	.long .Ldefault_entry		/* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
@@ -347,7 +345,6 @@ ENTRY(startup_32_smp)
 	movl %eax,%cr0
 
 	lgdt early_gdt_descr
-	lidt idt_descr
 	ljmp $(__KERNEL_CS),$1f
 1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
 	movl %eax,%ss			# after changing gdt.
@@ -380,37 +377,6 @@ ENDPROC(startup_32_smp)
  */
 __INIT
 setup_once:
-	/*
-	 * Set up a idt with 256 interrupt gates that push zero if there
-	 * is no error code and then jump to early_idt_handler_common.
-	 * It doesn't actually load the idt - that needs to be done on
-	 * each CPU. Interrupts are enabled elsewhere, when we can be
-	 * relatively sure everything is ok.
-	 */
-
-	movl $idt_table,%edi
-	movl $early_idt_handler_array,%eax
-	movl $NUM_EXCEPTION_VECTORS,%ecx
-1:
-	movl %eax,(%edi)
-	movl %eax,4(%edi)
-	/* interrupt gate, dpl=0, present */
-	movl $(0x8E000000 + __KERNEL_CS),2(%edi)
-	addl $EARLY_IDT_HANDLER_SIZE,%eax
-	addl $8,%edi
-	loop 1b
-
-	movl $256 - NUM_EXCEPTION_VECTORS,%ecx
-	movl $ignore_int,%edx
-	movl $(__KERNEL_CS << 16),%eax
-	movw %dx,%ax		/* selector = 0x0010 = cs */
-	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
-2:
-	movl %eax,(%edi)
-	movl %edx,4(%edi)
-	addl $8,%edi
-	loop 2b
-
 #ifdef CONFIG_CC_STACKPROTECTOR
 	/*
 	 * Configure the stack canary. The linker can't handle this by
@@ -457,12 +423,9 @@ early_idt_handler_common:
 	/* The vector number is in pt_regs->gs */
 
 	cld
-	pushl	%fs		/* pt_regs->fs */
-	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
-	pushl	%es		/* pt_regs->es */
-	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
-	pushl	%ds		/* pt_regs->ds */
-	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%fs		/* pt_regs->fs (__fsh varies by model) */
+	pushl	%es		/* pt_regs->es (__esh varies by model) */
+	pushl	%ds		/* pt_regs->ds (__dsh varies by model) */
 	pushl	%eax		/* pt_regs->ax */
 	pushl	%ebp		/* pt_regs->bp */
 	pushl	%edi		/* pt_regs->di */
@@ -479,9 +442,8 @@ early_idt_handler_common:
 	/* Load the vector number into EDX */
 	movl	PT_GS(%esp), %edx
 
-	/* Load GS into pt_regs->gs and clear high bits */
+	/* Load GS into pt_regs->gs (and maybe clobber __gsh) */
 	movw	%gs, PT_GS(%esp)
-	movw	$0, PT_GS+2(%esp)
 
 	movl	%esp, %eax	/* args are pt_regs (EAX), trapnr (EDX) */
 	call	early_fixup_exception
@@ -493,18 +455,17 @@ early_idt_handler_common:
 	popl	%edi		/* pt_regs->di */
 	popl	%ebp		/* pt_regs->bp */
 	popl	%eax		/* pt_regs->ax */
-	popl	%ds		/* pt_regs->ds */
-	popl	%es		/* pt_regs->es */
-	popl	%fs		/* pt_regs->fs */
-	popl	%gs		/* pt_regs->gs */
+	popl	%ds		/* pt_regs->ds (always ignores __dsh) */
+	popl	%es		/* pt_regs->es (always ignores __esh) */
+	popl	%fs		/* pt_regs->fs (always ignores __fsh) */
+	popl	%gs		/* pt_regs->gs (always ignores __gsh) */
 	decl	%ss:early_recursion_flag
 	addl	$4, %esp	/* pop pt_regs->orig_ax */
 	iret
 ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
-	ALIGN
-ignore_int:
+ENTRY(early_ignore_irq)
 	cld
 #ifdef CONFIG_PRINTK
 	pushl %eax
@@ -539,7 +500,8 @@ ignore_int:
 hlt_loop:
 	hlt
 	jmp hlt_loop
-ENDPROC(ignore_int)
+ENDPROC(early_ignore_irq)
+
 __INITDATA
 	.align 4
 GLOBAL(early_recursion_flag)
@@ -628,7 +590,6 @@ int_msg:
 
 	.data
 .globl boot_gdt_descr
-.globl idt_descr
 
 	ALIGN
 # early boot GDT descriptor (must use 1:1 address mapping)
@@ -637,11 +598,6 @@ boot_gdt_descr:
 	.word __BOOT_DS+7
 	.long boot_gdt - __PAGE_OFFSET
 
-	.word 0				# 32-bit align idt_desc.address
-idt_descr:
-	.word IDT_ENTRIES*8-1		# idt contains 256 entries
-	.long idt_table
-
 # boot GDT descriptor (later on used by CPU#0):
 	.word 0				# 32 bit align gdt_desc.address
 ENTRY(early_gdt_descr)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6225550883df..513cbb012ecc 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -73,12 +73,19 @@ startup_64:
 	/* Sanitize CPU configuration */
 	call verify_cpu
 
+	/*
+	 * Perform pagetable fixups. Additionally, if SME is active, encrypt
+	 * the kernel and retrieve the modifier (SME encryption mask if SME
+	 * is active) to be added to the initial pgdir entry that will be
+	 * programmed into CR3.
+	 */
 	leaq	_text(%rip), %rdi
 	pushq	%rsi
 	call	__startup_64
 	popq	%rsi
 
-	movq	$(early_top_pgt - __START_KERNEL_map), %rax
+	/* Form the CR3 value being sure to include the CR3 modifier */
+	addq	$(early_top_pgt - __START_KERNEL_map), %rax
 	jmp 1f
 ENTRY(secondary_startup_64)
 	/*
@@ -98,7 +105,16 @@ ENTRY(secondary_startup_64)
 	/* Sanitize CPU configuration */
 	call verify_cpu
 
-	movq	$(init_top_pgt - __START_KERNEL_map), %rax
+	/*
+	 * Retrieve the modifier (SME encryption mask if SME is active) to be
+	 * added to the initial pgdir entry that will be programmed into CR3.
+	 */
+	pushq	%rsi
+	call	__startup_secondary_64
+	popq	%rsi
+
+	/* Form the CR3 value being sure to include the CR3 modifier */
+	addq	$(init_top_pgt - __START_KERNEL_map), %rax
 1:
 
 	/* Enable PAE mode, PGE and LA57 */
@@ -335,9 +351,9 @@ GLOBAL(name)
 NEXT_PAGE(early_top_pgt)
 	.fill	511,8,0
 #ifdef CONFIG_X86_5LEVEL
-	.quad	level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad	level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #else
-	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #endif
 
 NEXT_PAGE(early_dynamic_pgts)
@@ -350,15 +366,15 @@ NEXT_PAGE(init_top_pgt)
 	.fill	512,8,0
 #else
 NEXT_PAGE(init_top_pgt)
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_START_KERNEL*8, 0
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 
 NEXT_PAGE(level3_ident_pgt)
-	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.fill	511, 8, 0
 NEXT_PAGE(level2_ident_pgt)
 	/* Since I easily can, map the first 1G.
@@ -370,14 +386,14 @@ NEXT_PAGE(level2_ident_pgt)
 #ifdef CONFIG_X86_5LEVEL
 NEXT_PAGE(level4_kernel_pgt)
 	.fill	511,8,0
-	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #endif
 
 NEXT_PAGE(level3_kernel_pgt)
 	.fill	L3_START_KERNEL,8,0
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
-	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 
 NEXT_PAGE(level2_kernel_pgt)
 	/*
@@ -395,7 +411,7 @@ NEXT_PAGE(level2_kernel_pgt)
 
 NEXT_PAGE(level2_fixmap_pgt)
 	.fill	506,8,0
-	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
 	.fill	5,8,0
 
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
new file mode 100644
index 000000000000..6107ee1cb8d5
--- /dev/null
+++ b/arch/x86/kernel/idt.c
@@ -0,0 +1,371 @@
+/*
+ * Interrupt descriptor table related code
+ *
+ * This file is licensed under the GPL V2
+ */
+#include <linux/interrupt.h>
+
+#include <asm/traps.h>
+#include <asm/proto.h>
+#include <asm/desc.h>
+
+struct idt_data {
+	unsigned int	vector;
+	unsigned int	segment;
+	struct idt_bits	bits;
+	const void	*addr;
+};
+
+#define DPL0		0x0
+#define DPL3		0x3
+
+#define DEFAULT_STACK	0
+
+#define G(_vector, _addr, _ist, _type, _dpl, _segment)	\
+	{						\
+		.vector		= _vector,		\
+		.bits.ist	= _ist,			\
+		.bits.type	= _type,		\
+		.bits.dpl	= _dpl,			\
+		.bits.p		= 1,			\
+		.addr		= _addr,		\
+		.segment	= _segment,		\
+	}
+
+/* Interrupt gate */
+#define INTG(_vector, _addr)				\
+	G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* System interrupt gate */
+#define SYSG(_vector, _addr)				\
+	G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+
+/* Interrupt gate with interrupt stack */
+#define ISTG(_vector, _addr, _ist)			\
+	G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* System interrupt gate with interrupt stack */
+#define SISTG(_vector, _addr, _ist)			\
+	G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+
+/* Task gate */
+#define TSKG(_vector, _gdt)				\
+	G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
+
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_idts[] = {
+	INTG(X86_TRAP_DB,		debug),
+	SYSG(X86_TRAP_BP,		int3),
+#ifdef CONFIG_X86_32
+	INTG(X86_TRAP_PF,		page_fault),
+#endif
+};
+
+/*
+ * The default IDT entries which are set up in trap_init() before
+ * cpu_init() is invoked. Interrupt stacks cannot be used at that point and
+ * the traps which use them are reinitialized with IST after cpu_init() has
+ * set up TSS.
+ */
+static const __initdata struct idt_data def_idts[] = {
+	INTG(X86_TRAP_DE,		divide_error),
+	INTG(X86_TRAP_NMI,		nmi),
+	INTG(X86_TRAP_BR,		bounds),
+	INTG(X86_TRAP_UD,		invalid_op),
+	INTG(X86_TRAP_NM,		device_not_available),
+	INTG(X86_TRAP_OLD_MF,		coprocessor_segment_overrun),
+	INTG(X86_TRAP_TS,		invalid_TSS),
+	INTG(X86_TRAP_NP,		segment_not_present),
+	INTG(X86_TRAP_SS,		stack_segment),
+	INTG(X86_TRAP_GP,		general_protection),
+	INTG(X86_TRAP_SPURIOUS,		spurious_interrupt_bug),
+	INTG(X86_TRAP_MF,		coprocessor_error),
+	INTG(X86_TRAP_AC,		alignment_check),
+	INTG(X86_TRAP_XF,		simd_coprocessor_error),
+
+#ifdef CONFIG_X86_32
+	TSKG(X86_TRAP_DF,		GDT_ENTRY_DOUBLEFAULT_TSS),
+#else
+	INTG(X86_TRAP_DF,		double_fault),
+#endif
+	INTG(X86_TRAP_DB,		debug),
+	INTG(X86_TRAP_NMI,		nmi),
+	INTG(X86_TRAP_BP,		int3),
+
+#ifdef CONFIG_X86_MCE
+	INTG(X86_TRAP_MC,		&machine_check),
+#endif
+
+	SYSG(X86_TRAP_OF,		overflow),
+#if defined(CONFIG_IA32_EMULATION)
+	SYSG(IA32_SYSCALL_VECTOR,	entry_INT80_compat),
+#elif defined(CONFIG_X86_32)
+	SYSG(IA32_SYSCALL_VECTOR,	entry_INT80_32),
+#endif
+};
+
+/*
+ * The APIC and SMP idt entries
+ */
+static const __initdata struct idt_data apic_idts[] = {
+#ifdef CONFIG_SMP
+	INTG(RESCHEDULE_VECTOR,		reschedule_interrupt),
+	INTG(CALL_FUNCTION_VECTOR,	call_function_interrupt),
+	INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
+	INTG(IRQ_MOVE_CLEANUP_VECTOR,	irq_move_cleanup_interrupt),
+	INTG(REBOOT_VECTOR,		reboot_interrupt),
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	INTG(THERMAL_APIC_VECTOR,	thermal_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+	INTG(THRESHOLD_APIC_VECTOR,	threshold_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+	INTG(DEFERRED_ERROR_VECTOR,	deferred_error_interrupt),
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	INTG(LOCAL_TIMER_VECTOR,	apic_timer_interrupt),
+	INTG(X86_PLATFORM_IPI_VECTOR,	x86_platform_ipi),
+# ifdef CONFIG_HAVE_KVM
+	INTG(POSTED_INTR_VECTOR,	kvm_posted_intr_ipi),
+	INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
+	INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+# endif
+# ifdef CONFIG_IRQ_WORK
+	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
+# endif
+	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
+	INTG(ERROR_APIC_VECTOR,		error_interrupt),
+#endif
+};
+
+#ifdef CONFIG_X86_64
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_pf_idts[] = {
+	INTG(X86_TRAP_PF,		page_fault),
+};
+
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
+static const __initdata struct idt_data dbg_idts[] = {
+	INTG(X86_TRAP_DB,	debug),
+	INTG(X86_TRAP_BP,	int3),
+};
+#endif
+
+/* Must be page-aligned because the real IDT is used in a fixmap. */
+gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
+
+struct desc_ptr idt_descr __ro_after_init = {
+	.size		= (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1,
+	.address	= (unsigned long) idt_table,
+};
+
+#ifdef CONFIG_X86_64
+/* No need to be aligned, but done to keep all IDTs defined the same way. */
+gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
+
+/*
+ * The exceptions which use Interrupt stacks. They are setup after
+ * cpu_init() when the TSS has been initialized.
+ */
+static const __initdata struct idt_data ist_idts[] = {
+	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),
+	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK),
+	SISTG(X86_TRAP_BP,	int3,		DEBUG_STACK),
+	ISTG(X86_TRAP_DF,	double_fault,	DOUBLEFAULT_STACK),
+#ifdef CONFIG_X86_MCE
+	ISTG(X86_TRAP_MC,	&machine_check,	MCE_STACK),
+#endif
+};
+
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
+const struct desc_ptr debug_idt_descr = {
+	.size		= IDT_ENTRIES * 16 - 1,
+	.address	= (unsigned long) debug_idt_table,
+};
+#endif
+
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
+{
+	unsigned long addr = (unsigned long) d->addr;
+
+	gate->offset_low	= (u16) addr;
+	gate->segment		= (u16) d->segment;
+	gate->bits		= d->bits;
+	gate->offset_middle	= (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+	gate->offset_high	= (u32) (addr >> 32);
+	gate->reserved		= 0;
+#endif
+}
+
+static void
+idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
+{
+	gate_desc desc;
+
+	for (; size > 0; t++, size--) {
+		idt_init_desc(&desc, t);
+		write_idt_entry(idt, t->vector, &desc);
+		if (sys)
+			set_bit(t->vector, used_vectors);
+	}
+}
+
+static void set_intr_gate(unsigned int n, const void *addr)
+{
+	struct idt_data data;
+
+	BUG_ON(n > 0xFF);
+
+	memset(&data, 0, sizeof(data));
+	data.vector	= n;
+	data.addr	= addr;
+	data.segment	= __KERNEL_CS;
+	data.bits.type	= GATE_INTERRUPT;
+	data.bits.p	= 1;
+
+	idt_setup_from_table(idt_table, &data, 1, false);
+}
+
+/**
+ * idt_setup_early_traps - Initialize the idt table with early traps
+ *
+ * On X8664 these traps do not use interrupt stacks as they can't work
+ * before cpu_init() is invoked and sets up TSS. The IST variants are
+ * installed after that.
+ */
+void __init idt_setup_early_traps(void)
+{
+	idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts),
+			     true);
+	load_idt(&idt_descr);
+}
+
+/**
+ * idt_setup_traps - Initialize the idt table with default traps
+ */
+void __init idt_setup_traps(void)
+{
+	idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true);
+}
+
+#ifdef CONFIG_X86_64
+/**
+ * idt_setup_early_pf - Initialize the idt table with early pagefault handler
+ *
+ * On X8664 this does not use interrupt stacks as they can't work before
+ * cpu_init() is invoked and sets up TSS. The IST variant is installed
+ * after that.
+ *
+ * FIXME: Why is 32bit and 64bit installing the PF handler at different
+ * places in the early setup code?
+ */
+void __init idt_setup_early_pf(void)
+{
+	idt_setup_from_table(idt_table, early_pf_idts,
+			     ARRAY_SIZE(early_pf_idts), true);
+}
+
+/**
+ * idt_setup_ist_traps - Initialize the idt table with traps using IST
+ */
+void __init idt_setup_ist_traps(void)
+{
+	idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
+}
+
+/**
+ * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
+ */
+void __init idt_setup_debugidt_traps(void)
+{
+	memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
+
+	idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false);
+}
+#endif
+
+/**
+ * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
+ */
+void __init idt_setup_apic_and_irq_gates(void)
+{
+	int i = FIRST_EXTERNAL_VECTOR;
+	void *entry;
+
+	idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
+
+	for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+		set_intr_gate(i, entry);
+	}
+
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifdef CONFIG_X86_LOCAL_APIC
+		set_bit(i, used_vectors);
+		set_intr_gate(i, spurious_interrupt);
+#else
+		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+		set_intr_gate(i, entry);
+#endif
+	}
+}
+
+/**
+ * idt_setup_early_handler - Initializes the idt table with early handlers
+ */
+void __init idt_setup_early_handler(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
+		set_intr_gate(i, early_idt_handler_array[i]);
+#ifdef CONFIG_X86_32
+	for ( ; i < NR_VECTORS; i++)
+		set_intr_gate(i, early_ignore_irq);
+#endif
+	load_idt(&idt_descr);
+}
+
+/**
+ * idt_invalidate - Invalidate interrupt descriptor table
+ * @addr:	The virtual address of the 'invalid' IDT
+ */
+void idt_invalidate(void *addr)
+{
+	struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
+
+	load_idt(&idt);
+}
+
+void __init update_intr_gate(unsigned int n, const void *addr)
+{
+	if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+		return;
+	set_intr_gate(n, addr);
+}
+
+void alloc_intr_gate(unsigned int n, const void *addr)
+{
+	BUG_ON(n < FIRST_SYSTEM_VECTOR);
+	if (!test_and_set_bit(n, used_vectors))
+		set_intr_gate(n, addr);
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 4ed0aba8dbc8..52089c043160 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -29,9 +29,6 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
 
 atomic_t irq_err_count;
 
-/* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
-
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -87,13 +84,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
 	seq_puts(p, "  APIC ICR read retries\n");
-#endif
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
 		seq_puts(p, "  Platform interrupts\n");
 	}
+#endif
 #ifdef CONFIG_SMP
 	seq_printf(p, "%*s: ", prec, "RES");
 	for_each_online_cpu(j)
@@ -183,9 +180,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_perf_irqs;
 	sum += irq_stats(cpu)->apic_irq_work_irqs;
 	sum += irq_stats(cpu)->icr_read_retry_count;
-#endif
 	if (x86_platform_ipi_callback)
 		sum += irq_stats(cpu)->x86_platform_ipis;
+#endif
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
 	sum += irq_stats(cpu)->irq_call_count;
@@ -259,26 +256,26 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	return 1;
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC
+/* Function pointer for generic interrupt vector handling */
+void (*x86_platform_ipi_callback)(void) = NULL;
 /*
  * Handler for X86_PLATFORM_IPI_VECTOR.
  */
-void __smp_x86_platform_ipi(void)
-{
-	inc_irq_stat(x86_platform_ipis);
-
-	if (x86_platform_ipi_callback)
-		x86_platform_ipi_callback();
-}
-
 __visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	entering_ack_irq();
-	__smp_x86_platform_ipi();
+	trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+	inc_irq_stat(x86_platform_ipis);
+	if (x86_platform_ipi_callback)
+		x86_platform_ipi_callback();
+	trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
 	exiting_irq();
 	set_irq_regs(old_regs);
 }
+#endif
 
 #ifdef CONFIG_HAVE_KVM
 static void dummy_handler(void) {}
@@ -334,19 +331,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
 }
 #endif
 
-__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	entering_ack_irq();
-	trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
-	__smp_x86_platform_ipi();
-	trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
-	exiting_irq();
-	set_irq_regs(old_regs);
-}
-
-EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -431,7 +415,7 @@ int check_irq_vectors_for_cpu_disable(void)
 		 * this w/o holding vector_lock.
 		 */
 		for (vector = FIRST_EXTERNAL_VECTOR;
-		     vector < first_system_vector; vector++) {
+		     vector < FIRST_SYSTEM_VECTOR; vector++) {
 			if (!test_bit(vector, used_vectors) &&
 			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
 				if (++count == this_count)
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 275487872be2..70dee056f92b 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -11,35 +11,23 @@
 #include <asm/trace/irq_vectors.h>
 #include <linux/interrupt.h>
 
-static inline void __smp_irq_work_interrupt(void)
-{
-	inc_irq_stat(apic_irq_work_irqs);
-	irq_work_run();
-}
-
+#ifdef CONFIG_X86_LOCAL_APIC
 __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
-	__smp_irq_work_interrupt();
-	exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
-{
-	ipi_entering_ack_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
-	__smp_irq_work_interrupt();
+	inc_irq_stat(apic_irq_work_irqs);
+	irq_work_run();
 	trace_irq_work_exit(IRQ_WORK_VECTOR);
 	exiting_irq();
 }
 
 void arch_irq_work_raise(void)
 {
-#ifdef CONFIG_X86_LOCAL_APIC
 	if (!arch_irq_work_has_interrupt())
 		return;
 
 	apic->send_IPI_self(IRQ_WORK_VECTOR);
 	apic_wait_icr_idle();
-#endif
 }
+#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c7fd18526c3e..1add9e08e83e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -55,18 +55,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
 };
 
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
-			return 1;
-	}
-
-	return 0;
-}
-
 void __init init_ISA_irqs(void)
 {
 	struct irq_chip *chip = legacy_pic->chip;
@@ -99,100 +87,12 @@ void __init init_IRQ(void)
 	x86_init.irqs.intr_init();
 }
 
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-	/* IPI for generic single function call */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-			call_function_single_interrupt);
-
-	/* Low priority IPI to cleanup after moving an irq */
-	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-
-	/* IPI used for rebooting/stopping */
-	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif /* CONFIG_SMP */
-}
-
-static void __init apic_intr_init(void)
-{
-	smp_intr_init();
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
-	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-	alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/* self generated IPI for local APIC timer */
-	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-	/* IPI for X86 platform specific use */
-	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
-#ifdef CONFIG_HAVE_KVM
-	/* IPI for KVM to deliver posted interrupt */
-	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
-	/* IPI for KVM to deliver interrupt to wake up tasks */
-	alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
-	/* IPI for KVM to deliver nested posted interrupt */
-	alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi);
-#endif
-
-	/* IPI vectors for APIC spurious and error interrupts */
-	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
-	/* IRQ work interrupts: */
-# ifdef CONFIG_IRQ_WORK
-	alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
-# endif
-
-#endif
-}
-
 void __init native_init_IRQ(void)
 {
-	int i;
-
 	/* Execute any quirks before the call gates are initialised: */
 	x86_init.irqs.pre_vector_init();
 
-	apic_intr_init();
-
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	i = FIRST_EXTERNAL_VECTOR;
-#ifndef CONFIG_X86_LOCAL_APIC
-#define first_system_vector NR_VECTORS
-#endif
-	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
-		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		set_intr_gate(i, irq_entries_start +
-				8 * (i - FIRST_EXTERNAL_VECTOR));
-	}
-#ifdef CONFIG_X86_LOCAL_APIC
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
-		set_intr_gate(i, spurious_interrupt);
-#endif
+	idt_setup_apic_and_irq_gates();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 38b64587b31b..fd6f8fbbe6f2 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
 	struct setup_data_node *node = file->private_data;
 	unsigned long remain;
 	loff_t pos = *ppos;
-	struct page *pg;
 	void *p;
 	u64 pa;
 
@@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
 		count = node->len - pos;
 
 	pa = node->paddr + sizeof(struct setup_data) + pos;
-	pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
-	if (PageHighMem(pg)) {
-		p = ioremap_cache(pa, count);
-		if (!p)
-			return -ENXIO;
-	} else
-		p = __va(pa);
+	p = memremap(pa, count, MEMREMAP_WB);
+	if (!p)
+		return -ENOMEM;
 
 	remain = copy_to_user(user_buf, p, count);
 
-	if (PageHighMem(pg))
-		iounmap(p);
+	memunmap(p);
 
 	if (remain)
 		return -EFAULT;
@@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 	struct setup_data *data;
 	int error;
 	struct dentry *d;
-	struct page *pg;
 	u64 pa_data;
 	int no = 0;
 
@@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 			goto err_dir;
 		}
 
-		pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
-		if (PageHighMem(pg)) {
-			data = ioremap_cache(pa_data, sizeof(*data));
-			if (!data) {
-				kfree(node);
-				error = -ENXIO;
-				goto err_dir;
-			}
-		} else
-			data = __va(pa_data);
+		data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
+		if (!data) {
+			kfree(node);
+			error = -ENOMEM;
+			goto err_dir;
+		}
 
 		node->paddr = pa_data;
 		node->type = data->type;
@@ -143,8 +132,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 		error = create_setup_data_node(d, no, node);
 		pa_data = data->next;
 
-		if (PageHighMem(pg))
-			iounmap(data);
+		memunmap(data);
 		if (error)
 			goto err_dir;
 		no++;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 69ea0bc1cfa3..4f98aad38237 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -39,6 +39,7 @@
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 #include <asm/set_memory.h>
+#include <asm/sections.h>
 
 #include "common.h"
 
@@ -251,10 +252,12 @@ static int can_optimize(unsigned long paddr)
 
 	/*
 	 * Do not optimize in the entry code due to the unstable
-	 * stack handling.
+	 * stack handling and registers setup.
 	 */
-	if ((paddr >= (unsigned long)__entry_text_start) &&
-	    (paddr <  (unsigned long)__entry_text_end))
+	if (((paddr >= (unsigned long)__entry_text_start) &&
+	     (paddr <  (unsigned long)__entry_text_end)) ||
+	    ((paddr >= (unsigned long)__irqentry_text_start) &&
+	     (paddr <  (unsigned long)__irqentry_text_end)))
 		return 0;
 
 	/* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 06e1ff5562c0..4b0592ca9e47 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -16,8 +16,8 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/io.h>
 
-#include <asm/io.h>
 #include <asm/setup.h>
 
 static ssize_t version_show(struct kobject *kobj,
@@ -79,12 +79,12 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
 			*paddr = pa_data;
 			return 0;
 		}
-		data = ioremap_cache(pa_data, sizeof(*data));
+		data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
 		if (!data)
 			return -ENOMEM;
 
 		pa_data = data->next;
-		iounmap(data);
+		memunmap(data);
 		i++;
 	}
 	return -EINVAL;
@@ -97,17 +97,17 @@ static int __init get_setup_data_size(int nr, size_t *size)
 	u64 pa_data = boot_params.hdr.setup_data;
 
 	while (pa_data) {
-		data = ioremap_cache(pa_data, sizeof(*data));
+		data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
 		if (!data)
 			return -ENOMEM;
 		if (nr == i) {
 			*size = data->len;
-			iounmap(data);
+			memunmap(data);
 			return 0;
 		}
 
 		pa_data = data->next;
-		iounmap(data);
+		memunmap(data);
 		i++;
 	}
 	return -EINVAL;
@@ -127,12 +127,12 @@ static ssize_t type_show(struct kobject *kobj,
 	ret = get_setup_data_paddr(nr, &paddr);
 	if (ret)
 		return ret;
-	data = ioremap_cache(paddr, sizeof(*data));
+	data = memremap(paddr, sizeof(*data), MEMREMAP_WB);
 	if (!data)
 		return -ENOMEM;
 
 	ret = sprintf(buf, "0x%x\n", data->type);
-	iounmap(data);
+	memunmap(data);
 	return ret;
 }
 
@@ -154,7 +154,7 @@ static ssize_t setup_data_data_read(struct file *fp,
 	ret = get_setup_data_paddr(nr, &paddr);
 	if (ret)
 		return ret;
-	data = ioremap_cache(paddr, sizeof(*data));
+	data = memremap(paddr, sizeof(*data), MEMREMAP_WB);
 	if (!data)
 		return -ENOMEM;
 
@@ -170,15 +170,15 @@ static ssize_t setup_data_data_read(struct file *fp,
 		goto out;
 
 	ret = count;
-	p = ioremap_cache(paddr + sizeof(*data), data->len);
+	p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB);
 	if (!p) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	memcpy(buf, p + off, count);
-	iounmap(p);
+	memunmap(p);
 out:
-	iounmap(data);
+	memunmap(data);
 	return ret;
 }
 
@@ -250,13 +250,13 @@ static int __init get_setup_data_total_num(u64 pa_data, int *nr)
 	*nr = 0;
 	while (pa_data) {
 		*nr += 1;
-		data = ioremap_cache(pa_data, sizeof(*data));
+		data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
 		if (!data) {
 			ret = -ENOMEM;
 			goto out;
 		}
 		pa_data = data->next;
-		iounmap(data);
+		memunmap(data);
 	}
 
 out:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d04e30e3c0ff..874827b0d7ca 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -263,7 +263,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
-		trace_do_page_fault(regs, error_code);
+		do_page_fault(regs, error_code);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
@@ -455,7 +455,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 
 static void __init kvm_apf_trap_init(void)
 {
-	set_intr_gate(14, async_page_fault);
+	update_intr_gate(X86_TRAP_PF, async_page_fault);
 }
 
 void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index a870910c8565..f0e64db18ac8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -21,6 +21,25 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
+static void refresh_ldt_segments(void)
+{
+#ifdef CONFIG_X86_64
+	unsigned short sel;
+
+	/*
+	 * Make sure that the cached DS and ES descriptors match the updated
+	 * LDT.
+	 */
+	savesegment(ds, sel);
+	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
+		loadsegment(ds, sel);
+
+	savesegment(es, sel);
+	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
+		loadsegment(es, sel);
+#endif
+}
+
 /* context.lock is held for us, so we don't need any locking. */
 static void flush_ldt(void *__mm)
 {
@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
 
 	pc = &mm->context;
 	set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+
+	refresh_ldt_segments();
 }
 
 /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8c53c5d7a1bc..00bc751c861c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -26,18 +26,6 @@
 #include <asm/set_memory.h>
 #include <asm/debugreg.h>
 
-static void set_idt(void *newidt, __u16 limit)
-{
-	struct desc_ptr curidt;
-
-	/* ia32 supports unaliged loads & stores */
-	curidt.size    = limit;
-	curidt.address = (unsigned long)newidt;
-
-	load_idt(&curidt);
-}
-
-
 static void set_gdt(void *newgdt, __u16 limit)
 {
 	struct desc_ptr curgdt;
@@ -245,7 +233,7 @@ void machine_kexec(struct kimage *image)
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
 	set_gdt(phys_to_virt(0), 0);
-	set_idt(phys_to_virt(0), 0);
+	idt_invalidate(phys_to_virt(0));
 
 	/* now call it */
 	image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a30473c23..1f790cf9d38f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
 	return 0;
 err:
 	free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
 		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.kernpg_flag	= _KERNPG_TABLE_NOENC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -334,7 +335,8 @@ void machine_kexec(struct kimage *image)
 	image->start = relocate_kernel((unsigned long)image->head,
 				       (unsigned long)page_list,
 				       image->start,
-				       image->preserve_context);
+				       image->preserve_context,
+				       sme_active());
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (image->preserve_context)
@@ -602,3 +604,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
 	kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+	/*
+	 * If SME is active we need to be sure that kexec pages are
+	 * not encrypted because when we boot to the new kernel the
+	 * pages won't be accessed encrypted (initially).
+	 */
+	return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+	/*
+	 * If SME is active we need to reset the pages back to being
+	 * an encrypted mapping before freeing them.
+	 */
+	set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f67bd3205df7..62e7d70aadd5 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/unwind.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
@@ -213,7 +214,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    struct module *me)
 {
 	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
-		*para = NULL;
+		*para = NULL, *orc = NULL, *orc_ip = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -225,6 +226,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 			locks = s;
 		if (!strcmp(".parainstructions", secstrings + s->sh_name))
 			para = s;
+		if (!strcmp(".orc_unwind", secstrings + s->sh_name))
+			orc = s;
+		if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
+			orc_ip = s;
 	}
 
 	if (alt) {
@@ -248,6 +253,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 	/* make jump label nops */
 	jump_label_apply_nops(me);
 
+	if (orc && orc_ip)
+		unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
+				   (void *)orc->sh_addr, orc->sh_size);
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0d904d759ff1..5cbb3177ed17 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -429,16 +429,16 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	}
 }
 
-static struct mpf_intel *mpf_found;
+static unsigned long mpf_base;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
 	struct mpc_table *mpc;
 	unsigned long size;
 
-	mpc = early_ioremap(physptr, PAGE_SIZE);
+	mpc = early_memremap(physptr, PAGE_SIZE);
 	size = mpc->length;
-	early_iounmap(mpc, PAGE_SIZE);
+	early_memunmap(mpc, PAGE_SIZE);
 	apic_printk(APIC_VERBOSE, "  mpc: %lx-%lx\n", physptr, physptr + size);
 
 	return size;
@@ -450,7 +450,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
 	unsigned long size;
 
 	size = get_mpc_size(mpf->physptr);
-	mpc = early_ioremap(mpf->physptr, size);
+	mpc = early_memremap(mpf->physptr, size);
+
 	/*
 	 * Read the physical hardware table.  Anything here will
 	 * override the defaults.
@@ -461,10 +462,10 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
 #endif
 		pr_err("BIOS bug, MP table errors detected!...\n");
 		pr_cont("... disabling SMP support. (tell your hw vendor)\n");
-		early_iounmap(mpc, size);
+		early_memunmap(mpc, size);
 		return -1;
 	}
-	early_iounmap(mpc, size);
+	early_memunmap(mpc, size);
 
 	if (early)
 		return -1;
@@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
  */
 void __init default_get_smp_config(unsigned int early)
 {
-	struct mpf_intel *mpf = mpf_found;
+	struct mpf_intel *mpf;
 
 	if (!smp_found_config)
 		return;
 
-	if (!mpf)
+	if (!mpf_base)
 		return;
 
 	if (acpi_lapic && early)
@@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early)
 	if (acpi_lapic && acpi_ioapic)
 		return;
 
+	mpf = early_memremap(mpf_base, sizeof(*mpf));
+	if (!mpf) {
+		pr_err("MPTABLE: error mapping MP table\n");
+		return;
+	}
+
 	pr_info("Intel MultiProcessor Specification v1.%d\n",
 		mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early)
 	/*
 	 * Now see if we need to read further.
 	 */
-	if (mpf->feature1 != 0) {
+	if (mpf->feature1) {
 		if (early) {
 			/*
 			 * local APIC has default address
@@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early)
 		construct_default_ISA_mptable(mpf->feature1);
 
 	} else if (mpf->physptr) {
-		if (check_physptr(mpf, early))
+		if (check_physptr(mpf, early)) {
+			early_memunmap(mpf, sizeof(*mpf));
 			return;
+		}
 	} else
 		BUG();
 
@@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early)
 	/*
 	 * Only use the first configuration found.
 	 */
+
+	early_memunmap(mpf, sizeof(*mpf));
 }
 
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
@@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
 {
-	unsigned int *bp = phys_to_virt(base);
+	unsigned int *bp;
 	struct mpf_intel *mpf;
-	unsigned long mem;
+	int ret = 0;
 
 	apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
 		    base, base + length - 1);
 	BUILD_BUG_ON(sizeof(*mpf) != 16);
 
 	while (length > 0) {
+		bp = early_memremap(base, length);
 		mpf = (struct mpf_intel *)bp;
 		if ((*bp == SMP_MAGIC_IDENT) &&
 		    (mpf->length == 1) &&
@@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 #ifdef CONFIG_X86_LOCAL_APIC
 			smp_found_config = 1;
 #endif
-			mpf_found = mpf;
+			mpf_base = base;
 
-			pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
-				(unsigned long long) virt_to_phys(mpf),
-				(unsigned long long) virt_to_phys(mpf) +
-				sizeof(*mpf) - 1, mpf);
+			pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
+				base, base + sizeof(*mpf) - 1, mpf);
 
-			mem = virt_to_phys(mpf);
-			memblock_reserve(mem, sizeof(*mpf));
+			memblock_reserve(base, sizeof(*mpf));
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 
-			return 1;
+			ret = 1;
 		}
-		bp += 4;
+		early_memunmap(bp, length);
+
+		if (ret)
+			break;
+
+		base += 16;
 		length -= 16;
 	}
-	return 0;
+	return ret;
 }
 
 void __init default_find_smp_config(void)
@@ -838,29 +852,40 @@ static int __init update_mp_table(void)
 	char oem[10];
 	struct mpf_intel *mpf;
 	struct mpc_table *mpc, *mpc_new;
+	unsigned long size;
 
 	if (!enable_update_mptable)
 		return 0;
 
-	mpf = mpf_found;
-	if (!mpf)
+	if (!mpf_base)
 		return 0;
 
+	mpf = early_memremap(mpf_base, sizeof(*mpf));
+	if (!mpf) {
+		pr_err("MPTABLE: mpf early_memremap() failed\n");
+		return 0;
+	}
+
 	/*
 	 * Now see if we need to go further.
 	 */
-	if (mpf->feature1 != 0)
-		return 0;
+	if (mpf->feature1)
+		goto do_unmap_mpf;
 
 	if (!mpf->physptr)
-		return 0;
+		goto do_unmap_mpf;
 
-	mpc = phys_to_virt(mpf->physptr);
+	size = get_mpc_size(mpf->physptr);
+	mpc = early_memremap(mpf->physptr, size);
+	if (!mpc) {
+		pr_err("MPTABLE: mpc early_memremap() failed\n");
+		goto do_unmap_mpf;
+	}
 
 	if (!smp_check_mpc(mpc, oem, str))
-		return 0;
+		goto do_unmap_mpc;
 
-	pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf));
+	pr_info("mpf: %llx\n", (u64)mpf_base);
 	pr_info("physptr: %x\n", mpf->physptr);
 
 	if (mpc_new_phys && mpc->length > mpc_new_length) {
@@ -878,21 +903,32 @@ static int __init update_mp_table(void)
 		new = mpf_checksum((unsigned char *)mpc, mpc->length);
 		if (old == new) {
 			pr_info("mpc is readonly, please try alloc_mptable instead\n");
-			return 0;
+			goto do_unmap_mpc;
 		}
 		pr_info("use in-position replacing\n");
 	} else {
+		mpc_new = early_memremap(mpc_new_phys, mpc_new_length);
+		if (!mpc_new) {
+			pr_err("MPTABLE: new mpc early_memremap() failed\n");
+			goto do_unmap_mpc;
+		}
 		mpf->physptr = mpc_new_phys;
-		mpc_new = phys_to_virt(mpc_new_phys);
 		memcpy(mpc_new, mpc, mpc->length);
+		early_memunmap(mpc, size);
 		mpc = mpc_new;
+		size = mpc_new_length;
 		/* check if we can modify that */
 		if (mpc_new_phys - mpf->physptr) {
 			struct mpf_intel *mpf_new;
 			/* steal 16 bytes from [0, 1k) */
+			mpf_new = early_memremap(0x400 - 16, sizeof(*mpf_new));
+			if (!mpf_new) {
+				pr_err("MPTABLE: new mpf early_memremap() failed\n");
+				goto do_unmap_mpc;
+			}
 			pr_info("mpf new: %x\n", 0x400 - 16);
-			mpf_new = phys_to_virt(0x400 - 16);
 			memcpy(mpf_new, mpf, 16);
+			early_memunmap(mpf, sizeof(*mpf));
 			mpf = mpf_new;
 			mpf->physptr = mpc_new_phys;
 		}
@@ -909,6 +945,12 @@ static int __init update_mp_table(void)
 	 */
 	replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
 
+do_unmap_mpc:
+	early_memunmap(mpc, size);
+
+do_unmap_mpf:
+	early_memunmap(mpf, sizeof(*mpf));
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 446c8aa09b9b..35aafc95e4b8 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -39,26 +39,26 @@
 #include <trace/events/nmi.h>
 
 struct nmi_desc {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	struct list_head head;
 };
 
 static struct nmi_desc nmi_desc[NMI_MAX] = 
 {
 	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
 		.head = LIST_HEAD_INIT(nmi_desc[0].head),
 	},
 	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
 		.head = LIST_HEAD_INIT(nmi_desc[1].head),
 	},
 	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
 		.head = LIST_HEAD_INIT(nmi_desc[2].head),
 	},
 	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
 		.head = LIST_HEAD_INIT(nmi_desc[3].head),
 	},
 
@@ -163,7 +163,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 
 	init_irq_work(&action->irq_work, nmi_max_handler);
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/*
 	 * Indicate if there are multiple registrations on the
@@ -181,7 +181,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	else
 		list_add_tail_rcu(&action->list, &desc->head);
 	
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL(__register_nmi_handler);
@@ -192,7 +192,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
 	struct nmiaction *n;
 	unsigned long flags;
 
-	spin_lock_irqsave(&desc->lock, flags);
+	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	list_for_each_entry_rcu(n, &desc->head, list) {
 		/*
@@ -207,7 +207,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
 		}
 	}
 
-	spin_unlock_irqrestore(&desc->lock, flags);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bc0a849589bb..a14df9eecfed 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
 	.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
 	.safe_halt = native_safe_halt,
 	.halt = native_halt,
-#ifdef CONFIG_X86_64
-	.adjust_exception_frame = paravirt_nop,
-#endif
 };
 
 __visible struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 5e16d3f29594..0accc2404b92 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -93,9 +93,12 @@ again:
 	if (gfpflags_allow_blocking(flag)) {
 		page = dma_alloc_from_contiguous(dev, count, get_order(size),
 						 flag);
-		if (page && page_to_phys(page) + size > dma_mask) {
-			dma_release_from_contiguous(dev, page, count);
-			page = NULL;
+		if (page) {
+			addr = phys_to_dma(dev, page_to_phys(page));
+			if (addr + size > dma_mask) {
+				dma_release_from_contiguous(dev, page, count);
+				page = NULL;
+			}
 		}
 	}
 	/* fallback */
@@ -104,7 +107,7 @@ again:
 	if (!page)
 		return NULL;
 
-	addr = page_to_phys(page);
+	addr = phys_to_dma(dev, page_to_phys(page));
 	if (addr + size > dma_mask) {
 		__free_pages(page, get_order(size));
 
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a6d404087fe3..4fc3cb60ea11 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -32,7 +32,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 enum dma_data_direction dir,
 				 unsigned long attrs)
 {
-	dma_addr_t bus = page_to_phys(page) + offset;
+	dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
 	WARN_ON(size == 0);
 	if (!check_addr("map_single", dev, bus, size))
 		return NOMMU_MAPPING_ERROR;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e23577e17cf..677077510e30 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,12 +6,14 @@
 #include <linux/swiotlb.h>
 #include <linux/bootmem.h>
 #include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
 
 #include <asm/iommu.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
 #include <asm/xen/swiotlb-xen.h>
 #include <asm/iommu_table.h>
+
 int swiotlb __read_mostly;
 
 void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -79,8 +81,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
 		  pci_swiotlb_late_init);
 
 /*
- * if 4GB or more detected (and iommu=off not set) return 1
- * and set swiotlb to 1.
+ * If 4GB or more detected (and iommu=off not set) or if SME is active
+ * then set swiotlb to 1 and return 1.
  */
 int __init pci_swiotlb_detect_4gb(void)
 {
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void)
 	if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
 		swiotlb = 1;
 #endif
+
+	/*
+	 * If SME is active then swiotlb will be set to 1 so that bounce
+	 * buffers are allocated and used for devices that do not support
+	 * the addressing range required for the encryption mask.
+	 */
+	if (sme_active())
+		swiotlb = 1;
+
 	return swiotlb;
 }
 IOMMU_INIT(pci_swiotlb_detect_4gb,
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 91271122f0df..502a77d0adb0 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -16,7 +16,6 @@ void __init x86_early_init_platform_quirks(void)
 		x86_platform.legacy.reserve_bios_regions = 1;
 		break;
 	case X86_SUBARCH_XEN:
-	case X86_SUBARCH_LGUEST:
 		x86_platform.legacy.devices.pnpbios = 0;
 		x86_platform.legacy.rtc = 0;
 		break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca198080ea9..bd6b85fac666 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-	for (;;)
-		halt();
+	for (;;) {
+		/*
+		 * Use wbinvd followed by hlt to stop the processor. This
+		 * provides support for kexec on a processor that supports
+		 * SME. With kexec, going from SME inactive to SME active
+		 * requires clearing cache entries so that addresses without
+		 * the encryption bit set don't corrupt the same physical
+		 * address that has the encryption bit set when caches are
+		 * flushed. To achieve this a wbinvd is performed followed by
+		 * a hlt. Even if the processor is not in the kexec/SME
+		 * scenario this only adds a wbinvd to a halting processor.
+		 */
+		asm volatile("wbinvd; hlt" : : : "memory");
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c6d6dc5f8bb2..11966251cd42 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -56,7 +56,7 @@
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
 #include <asm/vm86.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
 #include <asm/proto.h>
 
 void __show_regs(struct pt_regs *regs, int all)
@@ -68,7 +68,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	if (user_mode(regs)) {
 		sp = regs->sp;
-		ss = regs->ss & 0xffff;
+		ss = regs->ss;
 		gs = get_user_gs(regs);
 	} else {
 		sp = kernel_stack_pointer(regs);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c3169be4c596..302e7b2572d1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 #include <asm/switch_to.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/vdso.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
 #include <asm/unistd.h>
 #ifdef CONFIG_IA32_EMULATION
 /* Not included via unistd.h */
@@ -69,8 +69,7 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
 
-	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
-		(void *)regs->ip);
+	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
 	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
 		regs->sp, regs->flags);
 	if (regs->orig_ax != -1)
@@ -149,6 +148,123 @@ void release_thread(struct task_struct *dead_task)
 	}
 }
 
+enum which_selector {
+	FS,
+	GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+					     unsigned short selector,
+					     enum which_selector which)
+{
+	if (likely(selector == 0)) {
+		/*
+		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
+		 * be the pre-existing saved base or it could be zero.  On AMD
+		 * (with X86_BUG_NULL_SEG), the segment base could be almost
+		 * anything.
+		 *
+		 * This branch is very hot (it's hit twice on almost every
+		 * context switch between 64-bit programs), and avoiding
+		 * the RDMSR helps a lot, so we just assume that whatever
+		 * value is already saved is correct.  This matches historical
+		 * Linux behavior, so it won't break existing applications.
+		 *
+		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+		 * report that the base is zero, it needs to actually be zero:
+		 * see the corresponding logic in load_seg_legacy.
+		 */
+	} else {
+		/*
+		 * If the selector is 1, 2, or 3, then the base is zero on
+		 * !X86_BUG_NULL_SEG CPUs and could be anything on
+		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
+		 * has never attempted to preserve the base across context
+		 * switches.
+		 *
+		 * If selector > 3, then it refers to a real segment, and
+		 * saving the base isn't necessary.
+		 */
+		if (which == FS)
+			prev_p->thread.fsbase = 0;
+		else
+			prev_p->thread.gsbase = 0;
+	}
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+	savesegment(fs, task->thread.fsindex);
+	savesegment(gs, task->thread.gsindex);
+	save_base_legacy(task, task->thread.fsindex, FS);
+	save_base_legacy(task, task->thread.gsindex, GS);
+}
+
+static __always_inline void loadseg(enum which_selector which,
+				    unsigned short sel)
+{
+	if (which == FS)
+		loadsegment(fs, sel);
+	else
+		load_gs_index(sel);
+}
+
+static __always_inline void load_seg_legacy(unsigned short prev_index,
+					    unsigned long prev_base,
+					    unsigned short next_index,
+					    unsigned long next_base,
+					    enum which_selector which)
+{
+	if (likely(next_index <= 3)) {
+		/*
+		 * The next task is using 64-bit TLS, is not using this
+		 * segment at all, or is having fun with arcane CPU features.
+		 */
+		if (next_base == 0) {
+			/*
+			 * Nasty case: on AMD CPUs, we need to forcibly zero
+			 * the base.
+			 */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				loadseg(which, __USER_DS);
+				loadseg(which, next_index);
+			} else {
+				/*
+				 * We could try to exhaustively detect cases
+				 * under which we can skip the segment load,
+				 * but there's really only one case that matters
+				 * for performance: if both the previous and
+				 * next states are fully zeroed, we can skip
+				 * the load.
+				 *
+				 * (This assumes that prev_base == 0 has no
+				 * false positives.  This is the case on
+				 * Intel-style CPUs.)
+				 */
+				if (likely(prev_index | next_index | prev_base))
+					loadseg(which, next_index);
+			}
+		} else {
+			if (prev_index != next_index)
+				loadseg(which, next_index);
+			wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+			       next_base);
+		}
+	} else {
+		/*
+		 * The next task is using a real segment.  Loading the selector
+		 * is sufficient.
+		 */
+		loadseg(which, next_index);
+	}
+}
+
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct *p, unsigned long tls)
 {
@@ -229,10 +345,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 		    unsigned long new_sp,
 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
 {
+	WARN_ON_ONCE(regs != current_pt_regs());
+
+	if (static_cpu_has(X86_BUG_NULL_SEG)) {
+		/* Loading zero below won't clear the base. */
+		loadsegment(fs, __USER_DS);
+		load_gs_index(__USER_DS);
+	}
+
 	loadsegment(fs, 0);
 	loadsegment(es, _ds);
 	loadsegment(ds, _ds);
 	load_gs_index(0);
+
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->cs		= _cs;
@@ -277,7 +402,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	unsigned prev_fsindex, prev_gsindex;
+
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+		     this_cpu_read(irq_count) != -1);
 
 	switch_fpu_prepare(prev_fpu, cpu);
 
@@ -286,8 +413,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 *
 	 * (e.g. xen_load_tls())
 	 */
-	savesegment(fs, prev_fsindex);
-	savesegment(gs, prev_gsindex);
+	save_fsgs(prev_p);
 
 	/*
 	 * Load TLS before restoring any segments so that segment loads
@@ -326,108 +452,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (unlikely(next->ds | prev->ds))
 		loadsegment(ds, next->ds);
 
-	/*
-	 * Switch FS and GS.
-	 *
-	 * These are even more complicated than DS and ES: they have
-	 * 64-bit bases are that controlled by arch_prctl.  The bases
-	 * don't necessarily match the selectors, as user code can do
-	 * any number of things to cause them to be inconsistent.
-	 *
-	 * We don't promise to preserve the bases if the selectors are
-	 * nonzero.  We also don't promise to preserve the base if the
-	 * selector is zero and the base doesn't match whatever was
-	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
-	 * FSGSBASE instructions are enabled, we'll need to offer
-	 * stronger guarantees.)
-	 *
-	 * As an invariant,
-	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
-	 * impossible.
-	 */
-	if (next->fsindex) {
-		/* Loading a nonzero value into FS sets the index and base. */
-		loadsegment(fs, next->fsindex);
-	} else {
-		if (next->fsbase) {
-			/* Next index is zero but next base is nonzero. */
-			if (prev_fsindex)
-				loadsegment(fs, 0);
-			wrmsrl(MSR_FS_BASE, next->fsbase);
-		} else {
-			/* Next base and index are both zero. */
-			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
-				/*
-				 * We don't know the previous base and can't
-				 * find out without RDMSR.  Forcibly clear it.
-				 */
-				loadsegment(fs, __USER_DS);
-				loadsegment(fs, 0);
-			} else {
-				/*
-				 * If the previous index is zero and ARCH_SET_FS
-				 * didn't change the base, then the base is
-				 * also zero and we don't need to do anything.
-				 */
-				if (prev->fsbase || prev_fsindex)
-					loadsegment(fs, 0);
-			}
-		}
-	}
-	/*
-	 * Save the old state and preserve the invariant.
-	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
-	 * without RDMSR because Intel user code can zero it without telling
-	 * us and AMD user code can program any 32-bit value without telling
-	 * us.
-	 */
-	if (prev_fsindex)
-		prev->fsbase = 0;
-	prev->fsindex = prev_fsindex;
-
-	if (next->gsindex) {
-		/* Loading a nonzero value into GS sets the index and base. */
-		load_gs_index(next->gsindex);
-	} else {
-		if (next->gsbase) {
-			/* Next index is zero but next base is nonzero. */
-			if (prev_gsindex)
-				load_gs_index(0);
-			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
-		} else {
-			/* Next base and index are both zero. */
-			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
-				/*
-				 * We don't know the previous base and can't
-				 * find out without RDMSR.  Forcibly clear it.
-				 *
-				 * This contains a pointless SWAPGS pair.
-				 * Fixing it would involve an explicit check
-				 * for Xen or a new pvop.
-				 */
-				load_gs_index(__USER_DS);
-				load_gs_index(0);
-			} else {
-				/*
-				 * If the previous index is zero and ARCH_SET_GS
-				 * didn't change the base, then the base is
-				 * also zero and we don't need to do anything.
-				 */
-				if (prev->gsbase || prev_gsindex)
-					load_gs_index(0);
-			}
-		}
-	}
-	/*
-	 * Save the old state and preserve the invariant.
-	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
-	 * without RDMSR because Intel user code can zero it without telling
-	 * us and AMD user code can program any 32-bit value without telling
-	 * us.
-	 */
-	if (prev_gsindex)
-		prev->gsbase = 0;
-	prev->gsindex = prev_gsindex;
+	load_seg_legacy(prev->fsindex, prev->fsbase,
+			next->fsindex, next->fsbase, FS);
+	load_seg_legacy(prev->gsindex, prev->gsbase,
+			next->gsindex, next->gsbase, GS);
 
 	switch_fpu_finish(next_fpu, cpu);
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 0bee04d41bed..eaa591cfd98b 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -1,6 +1,7 @@
 /*
  * This file contains work-arounds for x86 and x86_64 platform bugs.
  */
+#include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/irq.h>
 
@@ -656,3 +657,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
 #endif
+
+bool x86_apple_machine;
+EXPORT_SYMBOL(x86_apple_machine);
+
+void __init early_platform_quirks(void)
+{
+	x86_apple_machine = dmi_match(DMI_SYS_VENDOR, "Apple Inc.") ||
+			    dmi_match(DMI_SYS_VENDOR, "Apple Computer, Inc.");
+}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a56bf6051f4e..54984b142641 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -38,8 +38,6 @@
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
-static const struct desc_ptr no_idt = {};
-
 /*
  * This is set if we need to go through the 'emergency' path.
  * When machine_emergency_restart() is called, we may be on
@@ -638,7 +636,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_TRIPLE:
-			load_idt(&no_idt);
+			idt_invalidate(NULL);
 			__asm__ __volatile__("int3");
 
 			/* We're probably dead after this, but... */
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 98111b38ebfd..307d3bac5f04 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -47,6 +47,7 @@ relocate_kernel:
 	 * %rsi page_list
 	 * %rdx start address
 	 * %rcx preserve_context
+	 * %r8  sme_active
 	 */
 
 	/* Save the CPU context, used for jumping back */
@@ -71,6 +72,9 @@ relocate_kernel:
 	pushq $0
 	popfq
 
+	/* Save SME active flag */
+	movq	%r8, %r12
+
 	/*
 	 * get physical address of control page now
 	 * this is impossible after page table switch
@@ -132,6 +136,16 @@ identity_mapped:
 	/* Flush the TLB (needed?) */
 	movq	%r9, %cr3
 
+	/*
+	 * If SME is active, there could be old encrypted cache line
+	 * entries that will conflict with the now unencrypted memory
+	 * used by kexec. Flush the caches before copying the kernel.
+	 */
+	testq	%r12, %r12
+	jz 1f
+	wbinvd
+1:
+
 	movq	%rcx, %r11
 	call	swap_pages
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3486d0498800..d84afb0a322d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -69,6 +69,7 @@
 #include <linux/crash_dump.h>
 #include <linux/tboot.h>
 #include <linux/jiffies.h>
+#include <linux/mem_encrypt.h>
 
 #include <linux/usb/xhci-dbgp.h>
 #include <video/edid.h>
@@ -115,6 +116,7 @@
 #include <asm/microcode.h>
 #include <asm/mmu_context.h>
 #include <asm/kaslr.h>
+#include <asm/unwind.h>
 
 /*
  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -374,6 +376,14 @@ static void __init reserve_initrd(void)
 	    !ramdisk_image || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
 
+	/*
+	 * If SME is active, this memory will be marked encrypted by the
+	 * kernel when it is accessed (including relocation). However, the
+	 * ramdisk image was loaded decrypted by the bootloader, so make
+	 * sure that it is encrypted before accessing it.
+	 */
+	sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
+
 	initrd_start = 0;
 
 	mapped_size = memblock_mem_size(max_pfn_mapped);
@@ -890,7 +900,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	olpc_ofw_detect();
 
-	early_trap_init();
+	idt_setup_early_traps();
 	early_cpu_init();
 	early_ioremap_init();
 
@@ -1161,7 +1171,7 @@ void __init setup_arch(char **cmdline_p)
 
 	init_mem_mapping();
 
-	early_trap_pf_init();
+	idt_setup_early_pf();
 
 	/*
 	 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
@@ -1206,6 +1216,8 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
+	early_platform_quirks();
+
 	/*
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
@@ -1310,6 +1322,8 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_apply_memmap_quirks();
 #endif
+
+	unwind_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 10edd1e69a68..6e8fcb6f7e1e 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -155,13 +155,10 @@ static void __init pcpup_populate_pte(unsigned long addr)
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
-	struct desc_struct gdt;
+	struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
+					      0xFFFFF);
 
-	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
-			0x2 | DESCTYPE_S, 0x8);
-	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_rw(cpu),
-			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+	write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
 #endif
 }
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cc30a74e4adb..e04442345fc0 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -256,7 +256,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	} else if (IS_ENABLED(CONFIG_X86_32) &&
 		   !onsigstack &&
-		   (regs->ss & 0xffff) != __USER_DS &&
+		   regs->ss != __USER_DS &&
 		   !(ka->sa.sa_flags & SA_RESTORER) &&
 		   ka->sa.sa_restorer) {
 		/* This is the legacy signal stack switching. */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d798c0da451c..5c574dff4c1a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -254,84 +254,45 @@ finish:
 }
 
 /*
- * Reschedule call back.
+ * Reschedule call back. KVM uses this interrupt to force a cpu out of
+ * guest mode
  */
-static inline void __smp_reschedule_interrupt(void)
-{
-	inc_irq_stat(irq_resched_count);
-	scheduler_ipi();
-}
-
 __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
-	__smp_reschedule_interrupt();
-	/*
-	 * KVM uses this interrupt to force a cpu out of guest mode
-	 */
-}
-
-__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
-{
-	/*
-	 * Need to call irq_enter() before calling the trace point.
-	 * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
-	 * scheduler_ipi(). This is OK, since those functions are allowed
-	 * to nest.
-	 */
-	ipi_entering_ack_irq();
-	trace_reschedule_entry(RESCHEDULE_VECTOR);
-	__smp_reschedule_interrupt();
-	trace_reschedule_exit(RESCHEDULE_VECTOR);
-	exiting_irq();
-	/*
-	 * KVM uses this interrupt to force a cpu out of guest mode
-	 */
-}
+	inc_irq_stat(irq_resched_count);
 
-static inline void __smp_call_function_interrupt(void)
-{
-	generic_smp_call_function_interrupt();
-	inc_irq_stat(irq_call_count);
+	if (trace_resched_ipi_enabled()) {
+		/*
+		 * scheduler_ipi() might call irq_enter() as well, but
+		 * nested calls are fine.
+		 */
+		irq_enter();
+		trace_reschedule_entry(RESCHEDULE_VECTOR);
+		scheduler_ipi();
+		trace_reschedule_exit(RESCHEDULE_VECTOR);
+		irq_exit();
+		return;
+	}
+	scheduler_ipi();
 }
 
 __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
-	__smp_call_function_interrupt();
-	exiting_irq();
-}
-
-__visible void __irq_entry
-smp_trace_call_function_interrupt(struct pt_regs *regs)
-{
-	ipi_entering_ack_irq();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
-	__smp_call_function_interrupt();
-	trace_call_function_exit(CALL_FUNCTION_VECTOR);
-	exiting_irq();
-}
-
-static inline void __smp_call_function_single_interrupt(void)
-{
-	generic_smp_call_function_single_interrupt();
 	inc_irq_stat(irq_call_count);
-}
-
-__visible void __irq_entry
-smp_call_function_single_interrupt(struct pt_regs *regs)
-{
-	ipi_entering_ack_irq();
-	__smp_call_function_single_interrupt();
+	generic_smp_call_function_interrupt();
+	trace_call_function_exit(CALL_FUNCTION_VECTOR);
 	exiting_irq();
 }
 
-__visible void __irq_entry
-smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
-	__smp_call_function_single_interrupt();
+	inc_irq_stat(irq_call_count);
+	generic_smp_call_function_single_interrupt();
 	trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
 	exiting_irq();
 }
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 5f25cfbd952e..5ee663836c08 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -13,7 +13,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 	unsigned long addr, seg;
 
 	addr = regs->ip;
-	seg = regs->cs & 0xffff;
+	seg = regs->cs;
 	if (v8086_mode(regs)) {
 		addr = (addr & 0xffff) + (seg << 4);
 		return addr;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 213ddf3e937d..73e4d28112f8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,6 +21,7 @@
 #include <asm/compat.h>
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
+#include <asm/mpx.h>
 
 /*
  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -100,8 +101,8 @@ out:
 	return error;
 }
 
-static void find_start_end(unsigned long flags, unsigned long *begin,
-			   unsigned long *end)
+static void find_start_end(unsigned long addr, unsigned long flags,
+		unsigned long *begin, unsigned long *end)
 {
 	if (!in_compat_syscall() && (flags & MAP_32BIT)) {
 		/* This is usually used needed to map code in small
@@ -120,7 +121,10 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 	}
 
 	*begin	= get_mmap_base(1);
-	*end	= in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
+	if (in_compat_syscall())
+		*end = task_size_32bit();
+	else
+		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
 }
 
 unsigned long
@@ -132,10 +136,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct vm_unmapped_area_info info;
 	unsigned long begin, end;
 
+	addr = mpx_unmapped_area_check(addr, len, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
 	if (flags & MAP_FIXED)
 		return addr;
 
-	find_start_end(flags, &begin, &end);
+	find_start_end(addr, flags, &begin, &end);
 
 	if (len > end)
 		return -ENOMEM;
@@ -171,6 +179,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
 
+	addr = mpx_unmapped_area_check(addr, len, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -195,6 +207,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	info.length = len;
 	info.low_limit = PAGE_SIZE;
 	info.high_limit = get_mmap_base(0);
+
+	/*
+	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+	 * in the full address space.
+	 *
+	 * !in_compat_syscall() check to avoid high addresses for x32.
+	 */
+	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
+
 	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	if (filp) {
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index dcd699baea1b..a106b9719c58 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
 
 	while (n-- > 0) {
 		if (LDT_empty(info) || LDT_zero(info)) {
-			desc->a = desc->b = 0;
+			memset(desc, 0, sizeof(*desc));
 		} else {
 			fill_ldt(desc, info);
 
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 15515132bf0d..c6636d1f60b9 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -4,57 +4,38 @@
  * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
  *
  */
-#include <asm/hw_irq.h>
-#include <asm/desc.h>
+#include <linux/jump_label.h>
 #include <linux/atomic.h>
 
-atomic_t trace_idt_ctr = ATOMIC_INIT(0);
-struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
-				(unsigned long) trace_idt_table };
-
-/* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+#include <asm/hw_irq.h>
+#include <asm/desc.h>
 
-static int trace_irq_vector_refcount;
-static DEFINE_MUTEX(irq_vector_mutex);
+DEFINE_STATIC_KEY_FALSE(trace_pagefault_key);
 
-static void set_trace_idt_ctr(int val)
+int trace_pagefault_reg(void)
 {
-	atomic_set(&trace_idt_ctr, val);
-	/* Ensure the trace_idt_ctr is set before sending IPI */
-	wmb();
+	static_branch_inc(&trace_pagefault_key);
+	return 0;
 }
 
-static void switch_idt(void *arg)
+void trace_pagefault_unreg(void)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	load_current_idt();
-	local_irq_restore(flags);
+	static_branch_dec(&trace_pagefault_key);
 }
 
-int trace_irq_vector_regfunc(void)
+#ifdef CONFIG_SMP
+
+DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key);
+
+int trace_resched_ipi_reg(void)
 {
-	mutex_lock(&irq_vector_mutex);
-	if (!trace_irq_vector_refcount) {
-		set_trace_idt_ctr(1);
-		smp_call_function(switch_idt, NULL, 0);
-		switch_idt(NULL);
-	}
-	trace_irq_vector_refcount++;
-	mutex_unlock(&irq_vector_mutex);
+	static_branch_inc(&trace_resched_ipi_key);
 	return 0;
 }
 
-void trace_irq_vector_unregfunc(void)
+void trace_resched_ipi_unreg(void)
 {
-	mutex_lock(&irq_vector_mutex);
-	trace_irq_vector_refcount--;
-	if (!trace_irq_vector_refcount) {
-		set_trace_idt_ctr(0);
-		smp_call_function(switch_idt, NULL, 0);
-		switch_idt(NULL);
-	}
-	mutex_unlock(&irq_vector_mutex);
+	static_branch_dec(&trace_resched_ipi_key);
 }
+
+#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bf54309b85da..34ea3651362e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -38,11 +38,6 @@
 #include <linux/smp.h>
 #include <linux/io.h>
 
-#ifdef CONFIG_EISA
-#include <linux/ioport.h>
-#include <linux/eisa.h>
-#endif
-
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
 #endif
@@ -70,20 +65,13 @@
 #include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
-
-/* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
 #include <asm/proto.h>
 #endif
 
-/* Must be page-aligned because the real IDT is used in a fixmap. */
-gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
-
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
 
 static inline void cond_local_irq_enable(struct pt_regs *regs)
 {
@@ -935,87 +923,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 }
 #endif
 
-/* Set of traps needed for early debugging. */
-void __init early_trap_init(void)
-{
-	/*
-	 * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
-	 * is ready in cpu_init() <-- trap_init(). Before trap_init(),
-	 * CPU runs at ring 0 so it is impossible to hit an invalid
-	 * stack.  Using the original stack works well enough at this
-	 * early stage. DEBUG_STACK will be equipped after cpu_init() in
-	 * trap_init().
-	 *
-	 * We don't need to set trace_idt_table like set_intr_gate(),
-	 * since we don't have trace_debug and it will be reset to
-	 * 'debug' in trap_init() by set_intr_gate_ist().
-	 */
-	set_intr_gate_notrace(X86_TRAP_DB, debug);
-	/* int3 can be called from all */
-	set_system_intr_gate(X86_TRAP_BP, &int3);
-#ifdef CONFIG_X86_32
-	set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
-	load_idt(&idt_descr);
-}
-
-void __init early_trap_pf_init(void)
-{
-#ifdef CONFIG_X86_64
-	set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
-}
-
 void __init trap_init(void)
 {
-	int i;
-
-#ifdef CONFIG_EISA
-	void __iomem *p = early_ioremap(0x0FFFD9, 4);
-
-	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
-		EISA_bus = 1;
-	early_iounmap(p, 4);
-#endif
-
-	set_intr_gate(X86_TRAP_DE, divide_error);
-	set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
-	/* int4 can be called from all */
-	set_system_intr_gate(X86_TRAP_OF, &overflow);
-	set_intr_gate(X86_TRAP_BR, bounds);
-	set_intr_gate(X86_TRAP_UD, invalid_op);
-	set_intr_gate(X86_TRAP_NM, device_not_available);
-#ifdef CONFIG_X86_32
-	set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
-#else
-	set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
-#endif
-	set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
-	set_intr_gate(X86_TRAP_TS, invalid_TSS);
-	set_intr_gate(X86_TRAP_NP, segment_not_present);
-	set_intr_gate(X86_TRAP_SS, stack_segment);
-	set_intr_gate(X86_TRAP_GP, general_protection);
-	set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
-	set_intr_gate(X86_TRAP_MF, coprocessor_error);
-	set_intr_gate(X86_TRAP_AC, alignment_check);
-#ifdef CONFIG_X86_MCE
-	set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
-#endif
-	set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
-
-	/* Reserve all the builtin and the syscall vector: */
-	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
-		set_bit(i, used_vectors);
-
-#ifdef CONFIG_IA32_EMULATION
-	set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
-
-#ifdef CONFIG_X86_32
-	set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
+	idt_setup_traps();
 
 	/*
 	 * Set the IDT descriptor to a fixed read-only location, so that the
@@ -1030,20 +940,9 @@ void __init trap_init(void)
 	 */
 	cpu_init();
 
-	/*
-	 * X86_TRAP_DB and X86_TRAP_BP have been set
-	 * in early_trap_init(). However, ITS works only after
-	 * cpu_init() loads TSS. See comments in early_trap_init().
-	 */
-	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
-	/* int3 can be called from all */
-	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+	idt_setup_ist_traps();
 
 	x86_init.irqs.trap_init();
 
-#ifdef CONFIG_X86_64
-	memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
-	set_nmi_gate(X86_TRAP_DB, &debug);
-	set_nmi_gate(X86_TRAP_BP, &int3);
-#endif
+	idt_setup_debugidt_traps();
 }
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index b9389d72b2f7..d145a0b1f529 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -10,20 +10,22 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
-/*
- * This disables KASAN checking when reading a value from another task's stack,
- * since the other task could be running on another CPU and could have poisoned
- * the stack in the meantime.
- */
-#define READ_ONCE_TASK_STACK(task, x)			\
-({							\
-	unsigned long val;				\
-	if (task == current)				\
-		val = READ_ONCE(x);			\
-	else						\
-		val = READ_ONCE_NOCHECK(x);		\
-	val;						\
-})
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+
+	return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	return state->regs ? &state->regs->ip : state->bp + 1;
+}
 
 static void unwind_dump(struct unwind_state *state)
 {
@@ -66,15 +68,6 @@ static void unwind_dump(struct unwind_state *state)
 	}
 }
 
-unsigned long unwind_get_return_address(struct unwind_state *state)
-{
-	if (unwind_done(state))
-		return 0;
-
-	return __kernel_text_address(state->ip) ? state->ip : 0;
-}
-EXPORT_SYMBOL_GPL(unwind_get_return_address);
-
 static size_t regs_size(struct pt_regs *regs)
 {
 	/* x86_32 regs from kernel mode are two words shorter: */
@@ -91,10 +84,8 @@ static bool in_entry_code(unsigned long ip)
 	if (addr >= __entry_text_start && addr < __entry_text_end)
 		return true;
 
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
 	if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
 		return true;
-#endif
 
 	return false;
 }
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 039f36738e49..4f0e17b90463 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -19,6 +19,11 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+	return NULL;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct stack_info *info = &state->stack_info;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
new file mode 100644
index 000000000000..570b70d3f604
--- /dev/null
+++ b/arch/x86/kernel/unwind_orc.c
@@ -0,0 +1,582 @@
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+#include <asm/orc_types.h>
+#include <asm/orc_lookup.h>
+#include <asm/sections.h>
+
+#define orc_warn(fmt, ...) \
+	printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+
+extern int __start_orc_unwind_ip[];
+extern int __stop_orc_unwind_ip[];
+extern struct orc_entry __start_orc_unwind[];
+extern struct orc_entry __stop_orc_unwind[];
+
+static DEFINE_MUTEX(sort_mutex);
+int *cur_orc_ip_table = __start_orc_unwind_ip;
+struct orc_entry *cur_orc_table = __start_orc_unwind;
+
+unsigned int lookup_num_blocks;
+bool orc_init;
+
+static inline unsigned long orc_ip(const int *ip)
+{
+	return (unsigned long)ip + *ip;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
+}
+
+#ifdef CONFIG_MODULES
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	struct module *mod;
+
+	mod = __module_address(ip);
+	if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
+		return NULL;
+	return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
+			  mod->arch.num_orcs, ip);
+}
+#else
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
+static struct orc_entry *orc_find(unsigned long ip)
+{
+	if (!orc_init)
+		return NULL;
+
+	/* For non-init vmlinux addresses, use the fast lookup table: */
+	if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
+		unsigned int idx, start, stop;
+
+		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
+
+		if (unlikely((idx >= lookup_num_blocks-1))) {
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
+				 idx, lookup_num_blocks, ip);
+			return NULL;
+		}
+
+		start = orc_lookup[idx];
+		stop = orc_lookup[idx + 1] + 1;
+
+		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
+			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
+				 idx, lookup_num_blocks, start, stop, ip);
+			return NULL;
+		}
+
+		return __orc_find(__start_orc_unwind_ip + start,
+				  __start_orc_unwind + start, stop - start, ip);
+	}
+
+	/* vmlinux .init slow lookup: */
+	if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
+
+	/* Module lookup: */
+	return orc_module_find(ip);
+}
+
+static void orc_sort_swap(void *_a, void *_b, int size)
+{
+	struct orc_entry *orc_a, *orc_b;
+	struct orc_entry orc_tmp;
+	int *a = _a, *b = _b, tmp;
+	int delta = _b - _a;
+
+	/* Swap the .orc_unwind_ip entries: */
+	tmp = *a;
+	*a = *b + delta;
+	*b = tmp - delta;
+
+	/* Swap the corresponding .orc_unwind entries: */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+	orc_b = cur_orc_table + (b - cur_orc_ip_table);
+	orc_tmp = *orc_a;
+	*orc_a = *orc_b;
+	*orc_b = orc_tmp;
+}
+
+static int orc_sort_cmp(const void *_a, const void *_b)
+{
+	struct orc_entry *orc_a;
+	const int *a = _a, *b = _b;
+	unsigned long a_val = orc_ip(a);
+	unsigned long b_val = orc_ip(b);
+
+	if (a_val > b_val)
+		return 1;
+	if (a_val < b_val)
+		return -1;
+
+	/*
+	 * The "weak" section terminator entries need to always be on the left
+	 * to ensure the lookup code skips them in favor of real entries.
+	 * These terminator entries exist to handle any gaps created by
+	 * whitelisted .o files which didn't get objtool generation.
+	 */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+	return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
+}
+
+#ifdef CONFIG_MODULES
+void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
+			void *_orc, size_t orc_size)
+{
+	int *orc_ip = _orc_ip;
+	struct orc_entry *orc = _orc;
+	unsigned int num_entries = orc_ip_size / sizeof(int);
+
+	WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
+		     orc_size % sizeof(*orc) != 0 ||
+		     num_entries != orc_size / sizeof(*orc));
+
+	/*
+	 * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
+	 * associate an .orc_unwind_ip table entry with its corresponding
+	 * .orc_unwind entry so they can both be swapped.
+	 */
+	mutex_lock(&sort_mutex);
+	cur_orc_ip_table = orc_ip;
+	cur_orc_table = orc;
+	sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
+	mutex_unlock(&sort_mutex);
+
+	mod->arch.orc_unwind_ip = orc_ip;
+	mod->arch.orc_unwind = orc;
+	mod->arch.num_orcs = num_entries;
+}
+#endif
+
+void __init unwind_init(void)
+{
+	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
+	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
+	size_t num_entries = orc_ip_size / sizeof(int);
+	struct orc_entry *orc;
+	int i;
+
+	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
+	    orc_size % sizeof(struct orc_entry) != 0 ||
+	    num_entries != orc_size / sizeof(struct orc_entry)) {
+		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+
+	/* Sort the .orc_unwind and .orc_unwind_ip tables: */
+	sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
+	     orc_sort_swap);
+
+	/* Initialize the fast lookup table: */
+	lookup_num_blocks = orc_lookup_end - orc_lookup;
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				 num_entries,
+				 LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+			return;
+		}
+
+		orc_lookup[i] = orc - __start_orc_unwind;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
+			 LOOKUP_STOP_IP);
+	if (!orc) {
+		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
+
+	orc_init = true;
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+
+	return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	if (state->regs)
+		return &state->regs->ip;
+
+	if (state->sp)
+		return (unsigned long *)state->sp - 1;
+
+	return NULL;
+}
+
+static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+			    size_t len)
+{
+	struct stack_info *info = &state->stack_info;
+
+	/*
+	 * If the address isn't on the current stack, switch to the next one.
+	 *
+	 * We may have to traverse multiple stacks to deal with the possibility
+	 * that info->next_sp could point to an empty stack and the address
+	 * could be on a subsequent stack.
+	 */
+	while (!on_stack(info, (void *)addr, len))
+		if (get_stack_info(info->next_sp, state->task, info,
+				   &state->stack_mask))
+			return false;
+
+	return true;
+}
+
+static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
+			    unsigned long *val)
+{
+	if (!stack_access_ok(state, addr, sizeof(long)))
+		return false;
+
+	*val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
+	return true;
+}
+
+#define REGS_SIZE (sizeof(struct pt_regs))
+#define SP_OFFSET (offsetof(struct pt_regs, sp))
+#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
+#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
+
+static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
+			     unsigned long *ip, unsigned long *sp, bool full)
+{
+	size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
+	size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
+	struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
+
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		if (!stack_access_ok(state, addr, regs_size))
+			return false;
+
+		*ip = regs->ip;
+		*sp = regs->sp;
+
+		return true;
+	}
+
+	if (!stack_access_ok(state, addr, sp_offset))
+		return false;
+
+	*ip = regs->ip;
+
+	if (user_mode(regs)) {
+		if (!stack_access_ok(state, addr + sp_offset,
+				     REGS_SIZE - SP_OFFSET))
+			return false;
+
+		*sp = regs->sp;
+	} else
+		*sp = (unsigned long)&regs->sp;
+
+	return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
+	enum stack_type prev_type = state->stack_info.type;
+	struct orc_entry *orc;
+	struct pt_regs *ptregs;
+	bool indirect = false;
+
+	if (unwind_done(state))
+		return false;
+
+	/* Don't let modules unload while we're reading their ORC data. */
+	preempt_disable();
+
+	/* Have we reached the end? */
+	if (state->regs && user_mode(state->regs))
+		goto done;
+
+	/*
+	 * Find the orc_entry associated with the text address.
+	 *
+	 * Decrement call return addresses by one so they work for sibling
+	 * calls and calls to noreturn functions.
+	 */
+	orc = orc_find(state->signal ? state->ip : state->ip - 1);
+	if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
+		goto done;
+	orig_ip = state->ip;
+
+	/* Find the previous frame's stack: */
+	switch (orc->sp_reg) {
+	case ORC_REG_SP:
+		sp = state->sp + orc->sp_offset;
+		break;
+
+	case ORC_REG_BP:
+		sp = state->bp + orc->sp_offset;
+		break;
+
+	case ORC_REG_SP_INDIRECT:
+		sp = state->sp + orc->sp_offset;
+		indirect = true;
+		break;
+
+	case ORC_REG_BP_INDIRECT:
+		sp = state->bp + orc->sp_offset;
+		indirect = true;
+		break;
+
+	case ORC_REG_R10:
+		if (!state->regs || !state->full_regs) {
+			orc_warn("missing regs for base reg R10 at ip %p\n",
+				 (void *)state->ip);
+			goto done;
+		}
+		sp = state->regs->r10;
+		break;
+
+	case ORC_REG_R13:
+		if (!state->regs || !state->full_regs) {
+			orc_warn("missing regs for base reg R13 at ip %p\n",
+				 (void *)state->ip);
+			goto done;
+		}
+		sp = state->regs->r13;
+		break;
+
+	case ORC_REG_DI:
+		if (!state->regs || !state->full_regs) {
+			orc_warn("missing regs for base reg DI at ip %p\n",
+				 (void *)state->ip);
+			goto done;
+		}
+		sp = state->regs->di;
+		break;
+
+	case ORC_REG_DX:
+		if (!state->regs || !state->full_regs) {
+			orc_warn("missing regs for base reg DX at ip %p\n",
+				 (void *)state->ip);
+			goto done;
+		}
+		sp = state->regs->dx;
+		break;
+
+	default:
+		orc_warn("unknown SP base reg %d for ip %p\n",
+			 orc->sp_reg, (void *)state->ip);
+		goto done;
+	}
+
+	if (indirect) {
+		if (!deref_stack_reg(state, sp, &sp))
+			goto done;
+	}
+
+	/* Find IP, SP and possibly regs: */
+	switch (orc->type) {
+	case ORC_TYPE_CALL:
+		ip_p = sp - sizeof(long);
+
+		if (!deref_stack_reg(state, ip_p, &state->ip))
+			goto done;
+
+		state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+						  state->ip, (void *)ip_p);
+
+		state->sp = sp;
+		state->regs = NULL;
+		state->signal = false;
+		break;
+
+	case ORC_TYPE_REGS:
+		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+			orc_warn("can't dereference registers at %p for ip %p\n",
+				 (void *)sp, (void *)orig_ip);
+			goto done;
+		}
+
+		state->regs = (struct pt_regs *)sp;
+		state->full_regs = true;
+		state->signal = true;
+		break;
+
+	case ORC_TYPE_REGS_IRET:
+		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+			orc_warn("can't dereference iret registers at %p for ip %p\n",
+				 (void *)sp, (void *)orig_ip);
+			goto done;
+		}
+
+		ptregs = container_of((void *)sp, struct pt_regs, ip);
+		if ((unsigned long)ptregs >= prev_sp &&
+		    on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
+			state->regs = ptregs;
+			state->full_regs = false;
+		} else
+			state->regs = NULL;
+
+		state->signal = true;
+		break;
+
+	default:
+		orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+		break;
+	}
+
+	/* Find BP: */
+	switch (orc->bp_reg) {
+	case ORC_REG_UNDEFINED:
+		if (state->regs && state->full_regs)
+			state->bp = state->regs->bp;
+		break;
+
+	case ORC_REG_PREV_SP:
+		if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
+			goto done;
+		break;
+
+	case ORC_REG_BP:
+		if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
+			goto done;
+		break;
+
+	default:
+		orc_warn("unknown BP base reg %d for ip %p\n",
+			 orc->bp_reg, (void *)orig_ip);
+		goto done;
+	}
+
+	/* Prevent a recursive loop due to bad ORC data: */
+	if (state->stack_info.type == prev_type &&
+	    on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
+	    state->sp <= prev_sp) {
+		orc_warn("stack going in the wrong direction? ip=%p\n",
+			 (void *)orig_ip);
+		goto done;
+	}
+
+	preempt_enable();
+	return true;
+
+done:
+	preempt_enable();
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long *first_frame)
+{
+	memset(state, 0, sizeof(*state));
+	state->task = task;
+
+	/*
+	 * Refuse to unwind the stack of a task while it's executing on another
+	 * CPU.  This check is racy, but that's ok: the unwinder has other
+	 * checks to prevent it from going off the rails.
+	 */
+	if (task_on_another_cpu(task))
+		goto done;
+
+	if (regs) {
+		if (user_mode(regs))
+			goto done;
+
+		state->ip = regs->ip;
+		state->sp = kernel_stack_pointer(regs);
+		state->bp = regs->bp;
+		state->regs = regs;
+		state->full_regs = true;
+		state->signal = true;
+
+	} else if (task == current) {
+		asm volatile("lea (%%rip), %0\n\t"
+			     "mov %%rsp, %1\n\t"
+			     "mov %%rbp, %2\n\t"
+			     : "=r" (state->ip), "=r" (state->sp),
+			       "=r" (state->bp));
+
+	} else {
+		struct inactive_task_frame *frame = (void *)task->thread.sp;
+
+		state->sp = task->thread.sp;
+		state->bp = READ_ONCE_NOCHECK(frame->bp);
+		state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+	}
+
+	if (get_stack_info((unsigned long *)state->sp, state->task,
+			   &state->stack_info, &state->stack_mask))
+		return;
+
+	/*
+	 * The caller can provide the address of the first frame directly
+	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+	 * to start unwinding at.  Skip ahead until we reach it.
+	 */
+
+	/* When starting from regs, skip the regs frame: */
+	if (regs) {
+		unwind_next_frame(state);
+		return;
+	}
+
+	/* Otherwise, skip ahead to the user-specified starting frame: */
+	while (!unwind_done(state) &&
+	       (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+			state->sp <= (unsigned long)first_frame))
+		unwind_next_frame(state);
+
+	return;
+
+done:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return;
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c8a3b61be0aa..f05f00acac89 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page_types.h>
+#include <asm/orc_lookup.h>
 #include <asm/cache.h>
 #include <asm/boot.h>
 
@@ -148,6 +149,8 @@ SECTIONS
 
 	BUG_TABLE
 
+	ORC_UNWIND_TABLE
+
 	. = ALIGN(PAGE_SIZE);
 	__vvar_page = .;
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 2688c7dc5323..3ea624452f93 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -89,6 +89,5 @@ config KVM_MMU_AUDIT
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
-source drivers/lguest/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b1dd114956a..04d750813c9d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -108,7 +108,7 @@ module_param(dbg, bool, 0644);
 	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
 
 
-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
 #define PT64_DIR_BASE_ADDR_MASK \
 	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
 #define PT64_LVL_ADDR_MASK(level) \
@@ -126,7 +126,7 @@ module_param(dbg, bool, 0644);
 					    * PT32_LEVEL_BITS))) - 1))
 
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
-			| shadow_x_mask | shadow_nx_mask)
+			| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
 
 #define ACC_EXEC_MASK    1
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
@@ -186,6 +186,7 @@ static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
 static u64 __read_mostly shadow_mmio_value;
 static u64 __read_mostly shadow_present_mask;
+static u64 __read_mostly shadow_me_mask;
 
 /*
  * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
@@ -349,7 +350,7 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
  */
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-		u64 acc_track_mask)
+		u64 acc_track_mask, u64 me_mask)
 {
 	BUG_ON(!dirty_mask != !accessed_mask);
 	BUG_ON(!accessed_mask && !acc_track_mask);
@@ -362,6 +363,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 	shadow_x_mask = x_mask;
 	shadow_present_mask = p_mask;
 	shadow_acc_track_mask = acc_track_mask;
+	shadow_me_mask = me_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -2433,7 +2435,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
 	BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
 	spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask;
+	       shadow_user_mask | shadow_x_mask | shadow_me_mask;
 
 	if (sp_ad_disabled(sp))
 		spte |= shadow_acc_track_value;
@@ -2745,6 +2747,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		pte_access &= ~ACC_WRITE_MASK;
 
 	spte |= (u64)pfn << PAGE_SHIFT;
+	spte |= shadow_me_mask;
 
 	if (pte_access & ACC_WRITE_MASK) {
 
@@ -4106,16 +4109,28 @@ void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
 	bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+	struct rsvd_bits_validate *shadow_zero_check;
+	int i;
 
 	/*
 	 * Passing "true" to the last argument is okay; it adds a check
 	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
 	 */
-	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+	shadow_zero_check = &context->shadow_zero_check;
+	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
 				boot_cpu_data.x86_phys_bits,
 				context->shadow_root_level, uses_nx,
 				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
 				true);
+
+	if (!shadow_me_mask)
+		return;
+
+	for (i = context->shadow_root_level; --i >= 0;) {
+		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+	}
+
 }
 EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
 
@@ -4133,17 +4148,29 @@ static void
 reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 				struct kvm_mmu *context)
 {
+	struct rsvd_bits_validate *shadow_zero_check;
+	int i;
+
+	shadow_zero_check = &context->shadow_zero_check;
+
 	if (boot_cpu_is_amd())
-		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+		__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
 					boot_cpu_data.x86_phys_bits,
 					context->shadow_root_level, false,
 					boot_cpu_has(X86_FEATURE_GBPAGES),
 					true, true);
 	else
-		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+		__reset_rsvds_bits_mask_ept(shadow_zero_check,
 					    boot_cpu_data.x86_phys_bits,
 					    false);
 
+	if (!shadow_me_mask)
+		return;
+
+	for (i = context->shadow_root_level; --i >= 0;) {
+		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+	}
 }
 
 /*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af256b786a70..8dbd8dbc83eb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1167,9 +1167,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb *vmcb = svm->vmcb;
 	struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
-	phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
-	phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
-	phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
+	phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
+	phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
 
 	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1232,8 +1232,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 		set_intercept(svm, INTERCEPT_MWAIT);
 	}
 
-	control->iopm_base_pa = iopm_base;
-	control->msrpm_base_pa = __pa(svm->msrpm);
+	control->iopm_base_pa = __sme_set(iopm_base);
+	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
 	control->int_ctl = V_INTR_MASKING_MASK;
 
 	init_seg(&save->es);
@@ -1377,9 +1377,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 
 	new_entry = READ_ONCE(*entry);
-	new_entry = (page_to_phys(svm->avic_backing_page) &
-		     AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
-		     AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
+	new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
+			      AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
+			      AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
 	WRITE_ONCE(*entry, new_entry);
 
 	svm->avic_physical_id_cache = entry;
@@ -1647,7 +1647,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
-	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+	svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
 	svm->asid_generation = 0;
 	init_vmcb(svm);
 
@@ -1675,7 +1675,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
@@ -2330,7 +2330,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
 	u64 pdpte;
 	int ret;
 
-	ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
+	ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
 				       offset_in_page(cr3) + index * 8, 8);
 	if (ret)
 		return 0;
@@ -2342,7 +2342,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	svm->vmcb->control.nested_cr3 = root;
+	svm->vmcb->control.nested_cr3 = __sme_set(root);
 	mark_dirty(svm->vmcb, VMCB_NPT);
 	svm_flush_tlb(vcpu);
 }
@@ -2873,7 +2873,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 		svm->nested.msrpm[p] = svm->msrpm[p] | value;
 	}
 
-	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+	svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
 
 	return true;
 }
@@ -4506,7 +4506,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
 		 irq.vector);
 	*svm = to_svm(vcpu);
-	vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+	vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
 	vcpu_info->vector = irq.vector;
 
 	return 0;
@@ -4557,7 +4557,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 			struct amd_iommu_pi_data pi;
 
 			/* Try to enable guest_mode in IRTE */
-			pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+			pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
+					    AVIC_HPA_MASK);
 			pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
 						     svm->vcpu.vcpu_id);
 			pi.is_guest_mode = true;
@@ -5006,7 +5007,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	svm->vmcb->save.cr3 = root;
+	svm->vmcb->save.cr3 = __sme_set(root);
 	mark_dirty(svm->vmcb, VMCB_CR);
 	svm_flush_tlb(vcpu);
 }
@@ -5015,7 +5016,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	svm->vmcb->control.nested_cr3 = root;
+	svm->vmcb->control.nested_cr3 = __sme_set(root);
 	mark_dirty(svm->vmcb, VMCB_NPT);
 
 	/* Also sync guest cr3 here in case we live migrate */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6ef2940119b..70b90c0810d0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6556,7 +6556,7 @@ void vmx_enable_tdp(void)
 		enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
 		0ull, VMX_EPT_EXECUTABLE_MASK,
 		cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
-		VMX_EPT_RWX_MASK);
+		VMX_EPT_RWX_MASK, 0ull);
 
 	ept_set_mmio_spte_mask();
 	kvm_enable_tdp();
@@ -8779,7 +8779,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 
 		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
 		desc = (gate_desc *)vmx->host_idt_base + vector;
-		entry = gate_offset(*desc);
+		entry = gate_offset(desc);
 		asm volatile(
 #ifdef CONFIG_X86_64
 			"mov %%" _ASM_SP ", %[sp]\n\t"
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 272320eb328c..ef5102f80497 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,7 @@
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
 #include <linux/sched/stat.h>
+#include <linux/mem_encrypt.h>
 
 #include <trace/events/kvm.h>
 
@@ -6125,7 +6126,7 @@ int kvm_arch_init(void *opaque)
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0,
-			PT_PRESENT_MASK, 0);
+			PT_PRESENT_MASK, 0, sme_me_mask);
 	kvm_timer_init();
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
deleted file mode 100644
index 08f41caada45..000000000000
--- a/arch/x86/lguest/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-config LGUEST_GUEST
-	bool "Lguest guest support"
-	depends on X86_32 && PARAVIRT && PCI
-	select TTY
-	select VIRTUALIZATION
-	select VIRTIO
-	select VIRTIO_CONSOLE
-	help
-	  Lguest is a tiny in-kernel hypervisor.  Selecting this will
-	  allow your kernel to boot under lguest.  This option will increase
-	  your kernel size by about 10k.  If in doubt, say N.
-
-	  If you say Y here, make sure you say Y (or M) to the virtio block
-	  and net drivers which lguest needs.
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
deleted file mode 100644
index 8f38d577a2fa..000000000000
--- a/arch/x86/lguest/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-y		:= head_32.o boot.o
-CFLAGS_boot.o	:= $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
deleted file mode 100644
index 99472698c931..000000000000
--- a/arch/x86/lguest/boot.c
+++ /dev/null
@@ -1,1558 +0,0 @@
-/*P:010
- * A hypervisor allows multiple Operating Systems to run on a single machine.
- * To quote David Wheeler: "Any problem in computer science can be solved with
- * another layer of indirection."
- *
- * We keep things simple in two ways.  First, we start with a normal Linux
- * kernel and insert a module (lg.ko) which allows us to run other Linux
- * kernels the same way we'd run processes.  We call the first kernel the Host,
- * and the others the Guests.  The program which sets up and configures Guests
- * (such as the example in tools/lguest/lguest.c) is called the Launcher.
- *
- * Secondly, we only run specially modified Guests, not normal kernels: setting
- * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
- * how to be a Guest at boot time.  This means that you can use the same kernel
- * you boot normally (ie. as a Host) as a Guest.
- *
- * These Guests know that they cannot do privileged operations, such as disable
- * interrupts, and that they have to ask the Host to do such things explicitly.
- * This file consists of all the replacements for such low-level native
- * hardware operations: these special Guest versions call the Host.
- *
- * So how does the kernel know it's a Guest?  We'll see that later, but let's
- * just say that we end up here where we replace the native functions various
- * "paravirt" structures with our Guest versions, then boot like normal.
-:*/
-
-/*
- * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/kernel.h>
-#include <linux/start_kernel.h>
-#include <linux/string.h>
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/lguest.h>
-#include <linux/lguest_launcher.h>
-#include <linux/virtio_console.h>
-#include <linux/pm.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/virtio_pci.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-#include <asm/lguest.h>
-#include <asm/paravirt.h>
-#include <asm/param.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/setup.h>
-#include <asm/e820/api.h>
-#include <asm/mce.h>
-#include <asm/io.h>
-#include <asm/fpu/api.h>
-#include <asm/stackprotector.h>
-#include <asm/reboot.h>		/* for struct machine_ops */
-#include <asm/kvm_para.h>
-#include <asm/pci_x86.h>
-#include <asm/pci-direct.h>
-
-/*G:010
- * Welcome to the Guest!
- *
- * The Guest in our tale is a simple creature: identical to the Host but
- * behaving in simplified but equivalent ways.  In particular, the Guest is the
- * same kernel as the Host (or at least, built from the same source code).
-:*/
-
-struct lguest_data lguest_data = {
-	.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
-	.noirq_iret = (u32)lguest_noirq_iret,
-	.kernel_address = PAGE_OFFSET,
-	.blocked_interrupts = { 1 }, /* Block timer interrupts */
-	.syscall_vec = IA32_SYSCALL_VECTOR,
-};
-
-/*G:037
- * async_hcall() is pretty simple: I'm quite proud of it really.  We have a
- * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
- * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
- * and 255 once the Host has finished with it.
- *
- * If we come around to a slot which hasn't been finished, then the table is
- * full and we just make the hypercall directly.  This has the nice side
- * effect of causing the Host to run all the stored calls in the ring buffer
- * which empties it for next time!
- */
-static void async_hcall(unsigned long call, unsigned long arg1,
-			unsigned long arg2, unsigned long arg3,
-			unsigned long arg4)
-{
-	/* Note: This code assumes we're uniprocessor. */
-	static unsigned int next_call;
-	unsigned long flags;
-
-	/*
-	 * Disable interrupts if not already disabled: we don't want an
-	 * interrupt handler making a hypercall while we're already doing
-	 * one!
-	 */
-	local_irq_save(flags);
-	if (lguest_data.hcall_status[next_call] != 0xFF) {
-		/* Table full, so do normal hcall which will flush table. */
-		hcall(call, arg1, arg2, arg3, arg4);
-	} else {
-		lguest_data.hcalls[next_call].arg0 = call;
-		lguest_data.hcalls[next_call].arg1 = arg1;
-		lguest_data.hcalls[next_call].arg2 = arg2;
-		lguest_data.hcalls[next_call].arg3 = arg3;
-		lguest_data.hcalls[next_call].arg4 = arg4;
-		/* Arguments must all be written before we mark it to go */
-		wmb();
-		lguest_data.hcall_status[next_call] = 0;
-		if (++next_call == LHCALL_RING_SIZE)
-			next_call = 0;
-	}
-	local_irq_restore(flags);
-}
-
-/*G:035
- * Notice the lazy_hcall() above, rather than hcall().  This is our first real
- * optimization trick!
- *
- * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
- * them as a batch when lazy_mode is eventually turned off.  Because hypercalls
- * are reasonably expensive, batching them up makes sense.  For example, a
- * large munmap might update dozens of page table entries: that code calls
- * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
- * lguest_leave_lazy_mode().
- *
- * So, when we're in lazy mode, we call async_hcall() to store the call for
- * future processing:
- */
-static void lazy_hcall1(unsigned long call, unsigned long arg1)
-{
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		hcall(call, arg1, 0, 0, 0);
-	else
-		async_hcall(call, arg1, 0, 0, 0);
-}
-
-/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
-static void lazy_hcall2(unsigned long call,
-			unsigned long arg1,
-			unsigned long arg2)
-{
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		hcall(call, arg1, arg2, 0, 0);
-	else
-		async_hcall(call, arg1, arg2, 0, 0);
-}
-
-static void lazy_hcall3(unsigned long call,
-			unsigned long arg1,
-			unsigned long arg2,
-			unsigned long arg3)
-{
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		hcall(call, arg1, arg2, arg3, 0);
-	else
-		async_hcall(call, arg1, arg2, arg3, 0);
-}
-
-#ifdef CONFIG_X86_PAE
-static void lazy_hcall4(unsigned long call,
-			unsigned long arg1,
-			unsigned long arg2,
-			unsigned long arg3,
-			unsigned long arg4)
-{
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-		hcall(call, arg1, arg2, arg3, arg4);
-	else
-		async_hcall(call, arg1, arg2, arg3, arg4);
-}
-#endif
-
-/*G:036
- * When lazy mode is turned off, we issue the do-nothing hypercall to
- * flush any stored calls, and call the generic helper to reset the
- * per-cpu lazy mode variable.
- */
-static void lguest_leave_lazy_mmu_mode(void)
-{
-	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
-	paravirt_leave_lazy_mmu();
-}
-
-/*
- * We also catch the end of context switch; we enter lazy mode for much of
- * that too, so again we need to flush here.
- *
- * (Technically, this is lazy CPU mode, and normally we're in lazy MMU
- * mode, but unlike Xen, lguest doesn't care about the difference).
- */
-static void lguest_end_context_switch(struct task_struct *next)
-{
-	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
-	paravirt_end_context_switch(next);
-}
-
-/*G:032
- * After that diversion we return to our first native-instruction
- * replacements: four functions for interrupt control.
- *
- * The simplest way of implementing these would be to have "turn interrupts
- * off" and "turn interrupts on" hypercalls.  Unfortunately, this is too slow:
- * these are by far the most commonly called functions of those we override.
- *
- * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
- * which the Guest can update with a single instruction.  The Host knows to
- * check there before it tries to deliver an interrupt.
- */
-
-/*
- * save_flags() is expected to return the processor state (ie. "flags").  The
- * flags word contains all kind of stuff, but in practice Linux only cares
- * about the interrupt flag.  Our "save_flags()" just returns that.
- */
-asmlinkage __visible unsigned long lguest_save_fl(void)
-{
-	return lguest_data.irq_enabled;
-}
-
-/* Interrupts go off... */
-asmlinkage __visible void lguest_irq_disable(void)
-{
-	lguest_data.irq_enabled = 0;
-}
-
-/*
- * Let's pause a moment.  Remember how I said these are called so often?
- * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
- * break some rules.  In particular, these functions are assumed to save their
- * own registers if they need to: normal C functions assume they can trash the
- * eax register.  To use normal C functions, we use
- * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
- * C function, then restores it.
- */
-PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
-PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
-/*:*/
-
-/* These are in head_32.S */
-extern void lg_irq_enable(void);
-extern void lg_restore_fl(unsigned long flags);
-
-/*M:003
- * We could be more efficient in our checking of outstanding interrupts, rather
- * than using a branch.  One way would be to put the "irq_enabled" field in a
- * page by itself, and have the Host write-protect it when an interrupt comes
- * in when irqs are disabled.  There will then be a page fault as soon as
- * interrupts are re-enabled.
- *
- * A better method is to implement soft interrupt disable generally for x86:
- * instead of disabling interrupts, we set a flag.  If an interrupt does come
- * in, we then disable them for real.  This is uncommon, so we could simply use
- * a hypercall for interrupt control and not worry about efficiency.
-:*/
-
-/*G:034
- * The Interrupt Descriptor Table (IDT).
- *
- * The IDT tells the processor what to do when an interrupt comes in.  Each
- * entry in the table is a 64-bit descriptor: this holds the privilege level,
- * address of the handler, and... well, who cares?  The Guest just asks the
- * Host to make the change anyway, because the Host controls the real IDT.
- */
-static void lguest_write_idt_entry(gate_desc *dt,
-				   int entrynum, const gate_desc *g)
-{
-	/*
-	 * The gate_desc structure is 8 bytes long: we hand it to the Host in
-	 * two 32-bit chunks.  The whole 32-bit kernel used to hand descriptors
-	 * around like this; typesafety wasn't a big concern in Linux's early
-	 * years.
-	 */
-	u32 *desc = (u32 *)g;
-	/* Keep the local copy up to date. */
-	native_write_idt_entry(dt, entrynum, g);
-	/* Tell Host about this new entry. */
-	hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
-}
-
-/*
- * Changing to a different IDT is very rare: we keep the IDT up-to-date every
- * time it is written, so we can simply loop through all entries and tell the
- * Host about them.
- */
-static void lguest_load_idt(const struct desc_ptr *desc)
-{
-	unsigned int i;
-	struct desc_struct *idt = (void *)desc->address;
-
-	for (i = 0; i < (desc->size+1)/8; i++)
-		hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
-}
-
-/*
- * The Global Descriptor Table.
- *
- * The Intel architecture defines another table, called the Global Descriptor
- * Table (GDT).  You tell the CPU where it is (and its size) using the "lgdt"
- * instruction, and then several other instructions refer to entries in the
- * table.  There are three entries which the Switcher needs, so the Host simply
- * controls the entire thing and the Guest asks it to make changes using the
- * LOAD_GDT hypercall.
- *
- * This is the exactly like the IDT code.
- */
-static void lguest_load_gdt(const struct desc_ptr *desc)
-{
-	unsigned int i;
-	struct desc_struct *gdt = (void *)desc->address;
-
-	for (i = 0; i < (desc->size+1)/8; i++)
-		hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
-}
-
-/*
- * For a single GDT entry which changes, we simply change our copy and
- * then tell the host about it.
- */
-static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
-				   const void *desc, int type)
-{
-	native_write_gdt_entry(dt, entrynum, desc, type);
-	/* Tell Host about this new entry. */
-	hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
-	      dt[entrynum].a, dt[entrynum].b, 0);
-}
-
-/*
- * There are three "thread local storage" GDT entries which change
- * on every context switch (these three entries are how glibc implements
- * __thread variables).  As an optimization, we have a hypercall
- * specifically for this case.
- *
- * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall
- * which took a range of entries?
- */
-static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-	/*
-	 * There's one problem which normal hardware doesn't have: the Host
-	 * can't handle us removing entries we're currently using.  So we clear
-	 * the GS register here: if it's needed it'll be reloaded anyway.
-	 */
-	lazy_load_gs(0);
-	lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu);
-}
-
-/*G:038
- * That's enough excitement for now, back to ploughing through each of the
- * different pv_ops structures (we're about 1/3 of the way through).
- *
- * This is the Local Descriptor Table, another weird Intel thingy.  Linux only
- * uses this for some strange applications like Wine.  We don't do anything
- * here, so they'll get an informative and friendly Segmentation Fault.
- */
-static void lguest_set_ldt(const void *addr, unsigned entries)
-{
-}
-
-/*
- * This loads a GDT entry into the "Task Register": that entry points to a
- * structure called the Task State Segment.  Some comments scattered though the
- * kernel code indicate that this used for task switching in ages past, along
- * with blood sacrifice and astrology.
- *
- * Now there's nothing interesting in here that we don't get told elsewhere.
- * But the native version uses the "ltr" instruction, which makes the Host
- * complain to the Guest about a Segmentation Fault and it'll oops.  So we
- * override the native version with a do-nothing version.
- */
-static void lguest_load_tr_desc(void)
-{
-}
-
-/*
- * The "cpuid" instruction is a way of querying both the CPU identity
- * (manufacturer, model, etc) and its features.  It was introduced before the
- * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
- * As you might imagine, after a decade and a half this treatment, it is now a
- * giant ball of hair.  Its entry in the current Intel manual runs to 28 pages.
- *
- * This instruction even it has its own Wikipedia entry.  The Wikipedia entry
- * has been translated into 6 languages.  I am not making this up!
- *
- * We could get funky here and identify ourselves as "GenuineLguest", but
- * instead we just use the real "cpuid" instruction.  Then I pretty much turned
- * off feature bits until the Guest booted.  (Don't say that: you'll damage
- * lguest sales!)  Shut up, inner voice!  (Hey, just pointing out that this is
- * hardly future proof.)  No one's listening!  They don't like you anyway,
- * parenthetic weirdo!
- *
- * Replacing the cpuid so we can turn features off is great for the kernel, but
- * anyone (including userspace) can just use the raw "cpuid" instruction and
- * the Host won't even notice since it isn't privileged.  So we try not to get
- * too worked up about it.
- */
-static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
-			 unsigned int *cx, unsigned int *dx)
-{
-	int function = *ax;
-
-	native_cpuid(ax, bx, cx, dx);
-	switch (function) {
-	/*
-	 * CPUID 0 gives the highest legal CPUID number (and the ID string).
-	 * We futureproof our code a little by sticking to known CPUID values.
-	 */
-	case 0:
-		if (*ax > 5)
-			*ax = 5;
-		break;
-
-	/*
-	 * CPUID 1 is a basic feature request.
-	 *
-	 * CX: we only allow kernel to see SSE3, CMPXCHG16B and SSSE3
-	 * DX: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU and PAE.
-	 */
-	case 1:
-		*cx &= 0x00002201;
-		*dx &= 0x07808151;
-		/*
-		 * The Host can do a nice optimization if it knows that the
-		 * kernel mappings (addresses above 0xC0000000 or whatever
-		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
-		 * flush_tlb_user() for both user and kernel mappings unless
-		 * the Page Global Enable (PGE) feature bit is set.
-		 */
-		*dx |= 0x00002000;
-		/*
-		 * We also lie, and say we're family id 5.  6 or greater
-		 * leads to a rdmsr in early_init_intel which we can't handle.
-		 * Family ID is returned as bits 8-12 in ax.
-		 */
-		*ax &= 0xFFFFF0FF;
-		*ax |= 0x00000500;
-		break;
-
-	/*
-	 * This is used to detect if we're running under KVM.  We might be,
-	 * but that's a Host matter, not us.  So say we're not.
-	 */
-	case KVM_CPUID_SIGNATURE:
-		*bx = *cx = *dx = 0;
-		break;
-
-	/*
-	 * 0x80000000 returns the highest Extended Function, so we futureproof
-	 * like we do above by limiting it to known fields.
-	 */
-	case 0x80000000:
-		if (*ax > 0x80000008)
-			*ax = 0x80000008;
-		break;
-
-	/*
-	 * PAE systems can mark pages as non-executable.  Linux calls this the
-	 * NX bit.  Intel calls it XD (eXecute Disable), AMD EVP (Enhanced
-	 * Virus Protection).  We just switch it off here, since we don't
-	 * support it.
-	 */
-	case 0x80000001:
-		*dx &= ~(1 << 20);
-		break;
-	}
-}
-
-/*
- * Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
- * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
- * it.  The Host needs to know when the Guest wants to change them, so we have
- * a whole series of functions like read_cr0() and write_cr0().
- *
- * We start with cr0.  cr0 allows you to turn on and off all kinds of basic
- * features, but the only cr0 bit that Linux ever used at runtime was the
- * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8)
- *
- * What does the TS bit do?  Well, it causes the CPU to trap (interrupt 7) if
- * the floating point unit is used.  Which allows us to restore FPU state
- * lazily after a task switch if we wanted to, but wouldn't a name like
- * "FPUTRAP bit" be a little less cryptic?
- *
- * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore
- * cr0.
- */
-static void lguest_write_cr0(unsigned long val)
-{
-}
-
-static unsigned long lguest_read_cr0(void)
-{
-	return 0;
-}
-
-/*
- * cr2 is the virtual address of the last page fault, which the Guest only ever
- * reads.  The Host kindly writes this into our "struct lguest_data", so we
- * just read it out of there.
- */
-static unsigned long lguest_read_cr2(void)
-{
-	return lguest_data.cr2;
-}
-
-/* See lguest_set_pte() below. */
-static bool cr3_changed = false;
-static unsigned long current_cr3;
-
-/*
- * cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0.  Keep a local copy, and tell the Host when it changes.
- */
-static void lguest_write_cr3(unsigned long cr3)
-{
-	lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
-	current_cr3 = cr3;
-
-	/* These two page tables are simple, linear, and used during boot */
-	if (cr3 != __pa_symbol(swapper_pg_dir) &&
-	    cr3 != __pa_symbol(initial_page_table))
-		cr3_changed = true;
-}
-
-static unsigned long lguest_read_cr3(void)
-{
-	return current_cr3;
-}
-
-/* cr4 is used to enable and disable PGE, but we don't care. */
-static unsigned long lguest_read_cr4(void)
-{
-	return 0;
-}
-
-static void lguest_write_cr4(unsigned long val)
-{
-}
-
-/*
- * Page Table Handling.
- *
- * Now would be a good time to take a rest and grab a coffee or similarly
- * relaxing stimulant.  The easy parts are behind us, and the trek gradually
- * winds uphill from here.
- *
- * Quick refresher: memory is divided into "pages" of 4096 bytes each.  The CPU
- * maps virtual addresses to physical addresses using "page tables".  We could
- * use one huge index of 1 million entries: each address is 4 bytes, so that's
- * 1024 pages just to hold the page tables.   But since most virtual addresses
- * are unused, we use a two level index which saves space.  The cr3 register
- * contains the physical address of the top level "page directory" page, which
- * contains physical addresses of up to 1024 second-level pages.  Each of these
- * second level pages contains up to 1024 physical addresses of actual pages,
- * or Page Table Entries (PTEs).
- *
- * Here's a diagram, where arrows indicate physical addresses:
- *
- * cr3 ---> +---------+
- *	    |  	   --------->+---------+
- *	    |	      |	     | PADDR1  |
- *	  Mid-level   |	     | PADDR2  |
- *	  (PMD) page  |	     | 	       |
- *	    |	      |	   Lower-level |
- *	    |	      |	   (PTE) page  |
- *	    |	      |	     |	       |
- *	      ....    	     	 ....
- *
- * So to convert a virtual address to a physical address, we look up the top
- * level, which points us to the second level, which gives us the physical
- * address of that page.  If the top level entry was not present, or the second
- * level entry was not present, then the virtual address is invalid (we
- * say "the page was not mapped").
- *
- * Put another way, a 32-bit virtual address is divided up like so:
- *
- *  1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
- *    Index into top     Index into second      Offset within page
- *  page directory page    pagetable page
- *
- * Now, unfortunately, this isn't the whole story: Intel added Physical Address
- * Extension (PAE) to allow 32 bit systems to use 64GB of memory (ie. 36 bits).
- * These are held in 64-bit page table entries, so we can now only fit 512
- * entries in a page, and the neat three-level tree breaks down.
- *
- * The result is a four level page table:
- *
- * cr3 --> [ 4 Upper  ]
- *	   [   Level  ]
- *	   [  Entries ]
- *	   [(PUD Page)]---> +---------+
- *	 		    |  	   --------->+---------+
- *	 		    |	      |	     | PADDR1  |
- *	 		  Mid-level   |	     | PADDR2  |
- *	 		  (PMD) page  |	     | 	       |
- *	 		    |	      |	   Lower-level |
- *	 		    |	      |	   (PTE) page  |
- *	 		    |	      |	     |	       |
- *	 		      ....    	     	 ....
- *
- *
- * And the virtual address is decoded as:
- *
- *         1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- *      |<-2->|<--- 9 bits ---->|<---- 9 bits --->|<------ 12 bits ------>|
- * Index into    Index into mid    Index into lower    Offset within page
- * top entries   directory page     pagetable page
- *
- * It's too hard to switch between these two formats at runtime, so Linux only
- * supports one or the other depending on whether CONFIG_X86_PAE is set.  Many
- * distributions turn it on, and not just for people with silly amounts of
- * memory: the larger PTE entries allow room for the NX bit, which lets the
- * kernel disable execution of pages and increase security.
- *
- * This was a problem for lguest, which couldn't run on these distributions;
- * then Matias Zabaljauregui figured it all out and implemented it, and only a
- * handful of puppies were crushed in the process!
- *
- * Back to our point: the kernel spends a lot of time changing both the
- * top-level page directory and lower-level pagetable pages.  The Guest doesn't
- * know physical addresses, so while it maintains these page tables exactly
- * like normal, it also needs to keep the Host informed whenever it makes a
- * change: the Host will create the real page tables based on the Guests'.
- */
-
-/*
- * The Guest calls this after it has set a second-level entry (pte), ie. to map
- * a page into a process' address space.  We tell the Host the toplevel and
- * address this corresponds to.  The Guest uses one pagetable per process, so
- * we need to tell the Host which one we're changing (mm->pgd).
- */
-static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
-			       pte_t *ptep)
-{
-#ifdef CONFIG_X86_PAE
-	/* PAE needs to hand a 64 bit page table entry, so it uses two args. */
-	lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
-		    ptep->pte_low, ptep->pte_high);
-#else
-	lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
-#endif
-}
-
-/* This is the "set and update" combo-meal-deal version. */
-static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	native_set_pte(ptep, pteval);
-	lguest_pte_update(mm, addr, ptep);
-}
-
-/*
- * The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
- * to set a middle-level entry when PAE is activated.
- *
- * Again, we set the entry then tell the Host which page we changed,
- * and the index of the entry we changed.
- */
-#ifdef CONFIG_X86_PAE
-static void lguest_set_pud(pud_t *pudp, pud_t pudval)
-{
-	native_set_pud(pudp, pudval);
-
-	/* 32 bytes aligned pdpt address and the index. */
-	lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
-		   (__pa(pudp) & 0x1F) / sizeof(pud_t));
-}
-
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-	native_set_pmd(pmdp, pmdval);
-	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
-}
-#else
-
-/* The Guest calls lguest_set_pmd to set a top-level entry when !PAE. */
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-	native_set_pmd(pmdp, pmdval);
-	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
-}
-#endif
-
-/*
- * There are a couple of legacy places where the kernel sets a PTE, but we
- * don't know the top level any more.  This is useless for us, since we don't
- * know which pagetable is changing or what address, so we just tell the Host
- * to forget all of them.  Fortunately, this is very rare.
- *
- * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow: 48 seconds!  So we don't even tell
- * the Host anything changed until we've done the first real page table switch,
- * which brings boot back to 4.3 seconds.
- */
-static void lguest_set_pte(pte_t *ptep, pte_t pteval)
-{
-	native_set_pte(ptep, pteval);
-	if (cr3_changed)
-		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-#ifdef CONFIG_X86_PAE
-/*
- * With 64-bit PTE values, we need to be careful setting them: if we set 32
- * bits at a time, the hardware could see a weird half-set entry.  These
- * versions ensure we update all 64 bits at once.
- */
-static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
-	native_set_pte_atomic(ptep, pte);
-	if (cr3_changed)
-		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-static void lguest_pte_clear(struct mm_struct *mm, unsigned long addr,
-			     pte_t *ptep)
-{
-	native_pte_clear(mm, addr, ptep);
-	lguest_pte_update(mm, addr, ptep);
-}
-
-static void lguest_pmd_clear(pmd_t *pmdp)
-{
-	lguest_set_pmd(pmdp, __pmd(0));
-}
-#endif
-
-/*
- * Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
- * native page table operations.  On native hardware you can set a new page
- * table entry whenever you want, but if you want to remove one you have to do
- * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
- *
- * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
- * called when a valid entry is written, not when it's removed (ie. marked not
- * present).  Instead, this is where we come when the Guest wants to remove a
- * page table entry: we tell the Host to set that entry to 0 (ie. the present
- * bit is zero).
- */
-static void lguest_flush_tlb_single(unsigned long addr)
-{
-	/* Simply set it to zero: if it was not, it will fault back in. */
-	lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
-}
-
-/*
- * This is what happens after the Guest has removed a large number of entries.
- * This tells the Host that any of the page table entries for userspace might
- * have changed, ie. virtual addresses below PAGE_OFFSET.
- */
-static void lguest_flush_tlb_user(void)
-{
-	lazy_hcall1(LHCALL_FLUSH_TLB, 0);
-}
-
-/*
- * This is called when the kernel page tables have changed.  That's not very
- * common (unless the Guest is using highmem, which makes the Guest extremely
- * slow), so it's worth separating this from the user flushing above.
- */
-static void lguest_flush_tlb_kernel(void)
-{
-	lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-/*
- * The Unadvanced Programmable Interrupt Controller.
- *
- * This is an attempt to implement the simplest possible interrupt controller.
- * I spent some time looking though routines like set_irq_chip_and_handler,
- * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
- * I *think* this is as simple as it gets.
- *
- * We can tell the Host what interrupts we want blocked ready for using the
- * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
- * simple as setting a bit.  We don't actually "ack" interrupts as such, we
- * just mask and unmask them.  I wonder if we should be cleverer?
- */
-static void disable_lguest_irq(struct irq_data *data)
-{
-	set_bit(data->irq, lguest_data.blocked_interrupts);
-}
-
-static void enable_lguest_irq(struct irq_data *data)
-{
-	clear_bit(data->irq, lguest_data.blocked_interrupts);
-}
-
-/* This structure describes the lguest IRQ controller. */
-static struct irq_chip lguest_irq_controller = {
-	.name		= "lguest",
-	.irq_mask	= disable_lguest_irq,
-	.irq_mask_ack	= disable_lguest_irq,
-	.irq_unmask	= enable_lguest_irq,
-};
-
-/*
- * Interrupt descriptors are allocated as-needed, but low-numbered ones are
- * reserved by the generic x86 code.  So we ignore irq_alloc_desc_at if it
- * tells us the irq is already used: other errors (ie. ENOMEM) we take
- * seriously.
- */
-static int lguest_setup_irq(unsigned int irq)
-{
-	struct irq_desc *desc;
-	int err;
-
-	/* Returns -ve error or vector number. */
-	err = irq_alloc_desc_at(irq, 0);
-	if (err < 0 && err != -EEXIST)
-		return err;
-
-	/*
-	 * Tell the Linux infrastructure that the interrupt is
-	 * controlled by our level-based lguest interrupt controller.
-	 */
-	irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
-				      handle_level_irq, "level");
-
-	/* Some systems map "vectors" to interrupts weirdly.  Not us! */
-	desc = irq_to_desc(irq);
-	__this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq], desc);
-	return 0;
-}
-
-static int lguest_enable_irq(struct pci_dev *dev)
-{
-	int err;
-	u8 line = 0;
-
-	/* We literally use the PCI interrupt line as the irq number. */
-	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
-	err = lguest_setup_irq(line);
-	if (!err)
-		dev->irq = line;
-	return err;
-}
-
-/* We don't do hotplug PCI, so this shouldn't be called. */
-static void lguest_disable_irq(struct pci_dev *dev)
-{
-	WARN_ON(1);
-}
-
-/*
- * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
- * interrupt (except 128, which is used for system calls).
- */
-static void __init lguest_init_IRQ(void)
-{
-	unsigned int i;
-
-	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
-		if (i != IA32_SYSCALL_VECTOR)
-			set_intr_gate(i, irq_entries_start +
-					8 * (i - FIRST_EXTERNAL_VECTOR));
-	}
-
-	/*
-	 * This call is required to set up for 4k stacks, where we have
-	 * separate stacks for hard and soft interrupts.
-	 */
-	irq_ctx_init(smp_processor_id());
-}
-
-/*
- * Time.
- *
- * It would be far better for everyone if the Guest had its own clock, but
- * until then the Host gives us the time on every interrupt.
- */
-static void lguest_get_wallclock(struct timespec *now)
-{
-	*now = lguest_data.time;
-}
-
-/*
- * The TSC is an Intel thing called the Time Stamp Counter.  The Host tells us
- * what speed it runs at, or 0 if it's unusable as a reliable clock source.
- * This matches what we want here: if we return 0 from this function, the x86
- * TSC clock will give up and not register itself.
- */
-static unsigned long lguest_tsc_khz(void)
-{
-	return lguest_data.tsc_khz;
-}
-
-/*
- * If we can't use the TSC, the kernel falls back to our lower-priority
- * "lguest_clock", where we read the time value given to us by the Host.
- */
-static u64 lguest_clock_read(struct clocksource *cs)
-{
-	unsigned long sec, nsec;
-
-	/*
-	 * Since the time is in two parts (seconds and nanoseconds), we risk
-	 * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
-	 * and getting 99 and 0.  As Linux tends to come apart under the stress
-	 * of time travel, we must be careful:
-	 */
-	do {
-		/* First we read the seconds part. */
-		sec = lguest_data.time.tv_sec;
-		/*
-		 * This read memory barrier tells the compiler and the CPU that
-		 * this can't be reordered: we have to complete the above
-		 * before going on.
-		 */
-		rmb();
-		/* Now we read the nanoseconds part. */
-		nsec = lguest_data.time.tv_nsec;
-		/* Make sure we've done that. */
-		rmb();
-		/* Now if the seconds part has changed, try again. */
-	} while (unlikely(lguest_data.time.tv_sec != sec));
-
-	/* Our lguest clock is in real nanoseconds. */
-	return sec*1000000000ULL + nsec;
-}
-
-/* This is the fallback clocksource: lower priority than the TSC clocksource. */
-static struct clocksource lguest_clock = {
-	.name		= "lguest",
-	.rating		= 200,
-	.read		= lguest_clock_read,
-	.mask		= CLOCKSOURCE_MASK(64),
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-/*
- * We also need a "struct clock_event_device": Linux asks us to set it to go
- * off some time in the future.  Actually, James Morris figured all this out, I
- * just applied the patch.
- */
-static int lguest_clockevent_set_next_event(unsigned long delta,
-                                           struct clock_event_device *evt)
-{
-	/* FIXME: I don't think this can ever happen, but James tells me he had
-	 * to put this code in.  Maybe we should remove it now.  Anyone? */
-	if (delta < LG_CLOCK_MIN_DELTA) {
-		if (printk_ratelimit())
-			printk(KERN_DEBUG "%s: small delta %lu ns\n",
-			       __func__, delta);
-		return -ETIME;
-	}
-
-	/* Please wake us this far in the future. */
-	hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
-	return 0;
-}
-
-static int lguest_clockevent_shutdown(struct clock_event_device *evt)
-{
-	/* A 0 argument shuts the clock down. */
-	hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
-	return 0;
-}
-
-/* This describes our primitive timer chip. */
-static struct clock_event_device lguest_clockevent = {
-	.name                   = "lguest",
-	.features               = CLOCK_EVT_FEAT_ONESHOT,
-	.set_next_event         = lguest_clockevent_set_next_event,
-	.set_state_shutdown	= lguest_clockevent_shutdown,
-	.rating                 = INT_MAX,
-	.mult                   = 1,
-	.shift                  = 0,
-	.min_delta_ns           = LG_CLOCK_MIN_DELTA,
-	.min_delta_ticks        = LG_CLOCK_MIN_DELTA,
-	.max_delta_ns           = LG_CLOCK_MAX_DELTA,
-	.max_delta_ticks        = LG_CLOCK_MAX_DELTA,
-};
-
-/*
- * This is the Guest timer interrupt handler (hardware interrupt 0).  We just
- * call the clockevent infrastructure and it does whatever needs doing.
- */
-static void lguest_time_irq(struct irq_desc *desc)
-{
-	unsigned long flags;
-
-	/* Don't interrupt us while this is running. */
-	local_irq_save(flags);
-	lguest_clockevent.event_handler(&lguest_clockevent);
-	local_irq_restore(flags);
-}
-
-/*
- * At some point in the boot process, we get asked to set up our timing
- * infrastructure.  The kernel doesn't expect timer interrupts before this, but
- * we cleverly initialized the "blocked_interrupts" field of "struct
- * lguest_data" so that timer interrupts were blocked until now.
- */
-static void lguest_time_init(void)
-{
-	/* Set up the timer interrupt (0) to go to our simple timer routine */
-	if (lguest_setup_irq(0) != 0)
-		panic("Could not set up timer irq");
-	irq_set_handler(0, lguest_time_irq);
-
-	clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
-
-	/* We can't set cpumask in the initializer: damn C limitations!  Set it
-	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of(0);
-	clockevents_register_device(&lguest_clockevent);
-
-	/* Finally, we unblock the timer interrupt. */
-	clear_bit(0, lguest_data.blocked_interrupts);
-}
-
-/*
- * Miscellaneous bits and pieces.
- *
- * Here is an oddball collection of functions which the Guest needs for things
- * to work.  They're pretty simple.
- */
-
-/*
- * The Guest needs to tell the Host what stack it expects traps to use.  For
- * native hardware, this is part of the Task State Segment mentioned above in
- * lguest_load_tr_desc(), but to help hypervisors there's this special call.
- *
- * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
- * segment), the privilege level (we're privilege level 1, the Host is 0 and
- * will not tolerate us trying to use that), the stack pointer, and the number
- * of pages in the stack.
- */
-static void lguest_load_sp0(struct tss_struct *tss,
-			    struct thread_struct *thread)
-{
-	lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
-		   THREAD_SIZE / PAGE_SIZE);
-	tss->x86_tss.sp0 = thread->sp0;
-}
-
-/* Let's just say, I wouldn't do debugging under a Guest. */
-static unsigned long lguest_get_debugreg(int regno)
-{
-	/* FIXME: Implement */
-	return 0;
-}
-
-static void lguest_set_debugreg(int regno, unsigned long value)
-{
-	/* FIXME: Implement */
-}
-
-/*
- * There are times when the kernel wants to make sure that no memory writes are
- * caught in the cache (that they've all reached real hardware devices).  This
- * doesn't matter for the Guest which has virtual hardware.
- *
- * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
- * (clflush) instruction is available and the kernel uses that.  Otherwise, it
- * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
- * Unlike clflush, wbinvd can only be run at privilege level 0.  So we can
- * ignore clflush, but replace wbinvd.
- */
-static void lguest_wbinvd(void)
-{
-}
-
-/*
- * If the Guest expects to have an Advanced Programmable Interrupt Controller,
- * we play dumb by ignoring writes and returning 0 for reads.  So it's no
- * longer Programmable nor Controlling anything, and I don't think 8 lines of
- * code qualifies for Advanced.  It will also never interrupt anything.  It
- * does, however, allow us to get through the Linux boot code.
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(u32 reg, u32 v)
-{
-}
-
-static u32 lguest_apic_read(u32 reg)
-{
-	return 0;
-}
-
-static u64 lguest_apic_icr_read(void)
-{
-	return 0;
-}
-
-static void lguest_apic_icr_write(u32 low, u32 id)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static void lguest_apic_wait_icr_idle(void)
-{
-	return;
-}
-
-static u32 lguest_apic_safe_wait_icr_idle(void)
-{
-	return 0;
-}
-
-static void set_lguest_basic_apic_ops(void)
-{
-	apic->read = lguest_apic_read;
-	apic->write = lguest_apic_write;
-	apic->icr_read = lguest_apic_icr_read;
-	apic->icr_write = lguest_apic_icr_write;
-	apic->wait_icr_idle = lguest_apic_wait_icr_idle;
-	apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle;
-};
-#endif
-
-/* STOP!  Until an interrupt comes in. */
-static void lguest_safe_halt(void)
-{
-	hcall(LHCALL_HALT, 0, 0, 0, 0);
-}
-
-/*
- * The SHUTDOWN hypercall takes a string to describe what's happening, and
- * an argument which says whether this to restart (reboot) the Guest or not.
- *
- * Note that the Host always prefers that the Guest speak in physical addresses
- * rather than virtual addresses, so we use __pa() here.
- */
-static void lguest_power_off(void)
-{
-	hcall(LHCALL_SHUTDOWN, __pa("Power down"),
-	      LGUEST_SHUTDOWN_POWEROFF, 0, 0);
-}
-
-/*
- * Panicing.
- *
- * Don't.  But if you did, this is what happens.
- */
-static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
-{
-	hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
-	/* The hcall won't return, but to keep gcc happy, we're "done". */
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block paniced = {
-	.notifier_call = lguest_panic
-};
-
-/* Setting up memory is fairly easy. */
-static __init char *lguest_memory_setup(void)
-{
-	/*
-	 * The Linux bootloader header contains an "e820" memory map: the
-	 * Launcher populated the first entry with our memory limit.
-	 */
-	e820__range_add(boot_params.e820_table[0].addr,
-			  boot_params.e820_table[0].size,
-			  boot_params.e820_table[0].type);
-
-	/* This string is for the boot messages. */
-	return "LGUEST";
-}
-
-/* Offset within PCI config space of BAR access capability. */
-static int console_cfg_offset = 0;
-static int console_access_cap;
-
-/* Set up so that we access off in bar0 (on bus 0, device 1, function 0) */
-static void set_cfg_window(u32 cfg_offset, u32 off)
-{
-	write_pci_config_byte(0, 1, 0,
-			      cfg_offset + offsetof(struct virtio_pci_cap, bar),
-			      0);
-	write_pci_config(0, 1, 0,
-			 cfg_offset + offsetof(struct virtio_pci_cap, length),
-			 4);
-	write_pci_config(0, 1, 0,
-			 cfg_offset + offsetof(struct virtio_pci_cap, offset),
-			 off);
-}
-
-static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val)
-{
-	/*
-	 * We could set this up once, then leave it; nothing else in the *
-	 * kernel should touch these registers.  But if it went wrong, that
-	 * would be a horrible bug to find.
-	 */
-	set_cfg_window(cfg_offset, off);
-	write_pci_config(0, 1, 0,
-			 cfg_offset + sizeof(struct virtio_pci_cap), val);
-}
-
-static void probe_pci_console(void)
-{
-	u8 cap, common_cap = 0, device_cap = 0;
-	u32 device_len;
-
-	/* Avoid recursive printk into here. */
-	console_cfg_offset = -1;
-
-	if (!early_pci_allowed()) {
-		printk(KERN_ERR "lguest: early PCI access not allowed!\n");
-		return;
-	}
-
-	/* We expect a console PCI device at BUS0, slot 1. */
-	if (read_pci_config(0, 1, 0, 0) != 0x10431AF4) {
-		printk(KERN_ERR "lguest: PCI device is %#x!\n",
-		       read_pci_config(0, 1, 0, 0));
-		return;
-	}
-
-	/* Find the capabilities we need (must be in bar0) */
-	cap = read_pci_config_byte(0, 1, 0, PCI_CAPABILITY_LIST);
-	while (cap) {
-		u8 vndr = read_pci_config_byte(0, 1, 0, cap);
-		if (vndr == PCI_CAP_ID_VNDR) {
-			u8 type, bar;
-
-			type = read_pci_config_byte(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, cfg_type));
-			bar = read_pci_config_byte(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, bar));
-
-			switch (type) {
-			case VIRTIO_PCI_CAP_DEVICE_CFG:
-				if (bar == 0)
-					device_cap = cap;
-				break;
-			case VIRTIO_PCI_CAP_PCI_CFG:
-				console_access_cap = cap;
-				break;
-			}
-		}
-		cap = read_pci_config_byte(0, 1, 0, cap + PCI_CAP_LIST_NEXT);
-	}
-	if (!device_cap || !console_access_cap) {
-		printk(KERN_ERR "lguest: No caps (%u/%u/%u) in console!\n",
-		       common_cap, device_cap, console_access_cap);
-		return;
-	}
-
-	/*
-	 * Note that we can't check features, until we've set the DRIVER
-	 * status bit.  We don't want to do that until we have a real driver,
-	 * so we just check that the device-specific config has room for
-	 * emerg_wr.  If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE
-	 * it should ignore the access.
-	 */
-	device_len = read_pci_config(0, 1, 0,
-			device_cap + offsetof(struct virtio_pci_cap, length));
-	if (device_len < (offsetof(struct virtio_console_config, emerg_wr)
-			  + sizeof(u32))) {
-		printk(KERN_ERR "lguest: console missing emerg_wr field\n");
-		return;
-	}
-
-	console_cfg_offset = read_pci_config(0, 1, 0,
-			device_cap + offsetof(struct virtio_pci_cap, offset));
-	printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n");
-}
-
-/*
- * We will eventually use the virtio console device to produce console output,
- * but before that is set up we use the virtio PCI console's backdoor mmio
- * access and the "emergency" write facility (which is legal even before the
- * device is configured).
- */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
-{
-	/* If we couldn't find PCI console, forget it. */
-	if (console_cfg_offset < 0)
-		return count;
-
-	if (unlikely(!console_cfg_offset)) {
-		probe_pci_console();
-		if (console_cfg_offset < 0)
-			return count;
-	}
-
-	write_bar_via_cfg(console_access_cap,
-			  console_cfg_offset
-			  + offsetof(struct virtio_console_config, emerg_wr),
-			  buf[0]);
-	return 1;
-}
-
-/*
- * Rebooting also tells the Host we're finished, but the RESTART flag tells the
- * Launcher to reboot us.
- */
-static void lguest_restart(char *reason)
-{
-	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
-}
-
-/*G:050
- * Patching (Powerfully Placating Performance Pedants)
- *
- * We have already seen that pv_ops structures let us replace simple native
- * instructions with calls to the appropriate back end all throughout the
- * kernel.  This allows the same kernel to run as a Guest and as a native
- * kernel, but it's slow because of all the indirect branches.
- *
- * Remember that David Wheeler quote about "Any problem in computer science can
- * be solved with another layer of indirection"?  The rest of that quote is
- * "... But that usually will create another problem."  This is the first of
- * those problems.
- *
- * Our current solution is to allow the paravirt back end to optionally patch
- * over the indirect calls to replace them with something more efficient.  We
- * patch two of the simplest of the most commonly called functions: disable
- * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
- * into: the Guest versions of these operations are small enough that we can
- * fit comfortably.
- *
- * First we need assembly templates of each of the patchable Guest operations,
- * and these are in head_32.S.
- */
-
-/*G:060 We construct a table from the assembler templates: */
-static const struct lguest_insns
-{
-	const char *start, *end;
-} lguest_insns[] = {
-	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
-	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
-};
-
-/*
- * Now our patch routine is fairly simple (based on the native one in
- * paravirt.c).  If we have a replacement, we copy it in and return how much of
- * the available space we used.
- */
-static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
-			     unsigned long addr, unsigned len)
-{
-	unsigned int insn_len;
-
-	/* Don't do anything special if we don't have a replacement */
-	if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
-		return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
-	insn_len = lguest_insns[type].end - lguest_insns[type].start;
-
-	/* Similarly if it can't fit (doesn't happen, but let's be thorough). */
-	if (len < insn_len)
-		return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
-	/* Copy in our instructions. */
-	memcpy(ibuf, lguest_insns[type].start, insn_len);
-	return insn_len;
-}
-
-/*G:029
- * Once we get to lguest_init(), we know we're a Guest.  The various
- * pv_ops structures in the kernel provide points for (almost) every routine we
- * have to override to avoid privileged instructions.
- */
-__init void lguest_init(void)
-{
-	/* We're under lguest. */
-	pv_info.name = "lguest";
-	/* We're running at privilege level 1, not 0 as normal. */
-	pv_info.kernel_rpl = 1;
-	/* Everyone except Xen runs with this set. */
-	pv_info.shared_kernel_pmd = 1;
-
-	/*
-	 * We set up all the lguest overrides for sensitive operations.  These
-	 * are detailed with the operations themselves.
-	 */
-
-	/* Interrupt-related operations */
-	pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl);
-	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
-	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable);
-	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
-	pv_irq_ops.safe_halt = lguest_safe_halt;
-
-	/* Setup operations */
-	pv_init_ops.patch = lguest_patch;
-
-	/* Intercepts of various CPU instructions */
-	pv_cpu_ops.load_gdt = lguest_load_gdt;
-	pv_cpu_ops.cpuid = lguest_cpuid;
-	pv_cpu_ops.load_idt = lguest_load_idt;
-	pv_cpu_ops.iret = lguest_iret;
-	pv_cpu_ops.load_sp0 = lguest_load_sp0;
-	pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
-	pv_cpu_ops.set_ldt = lguest_set_ldt;
-	pv_cpu_ops.load_tls = lguest_load_tls;
-	pv_cpu_ops.get_debugreg = lguest_get_debugreg;
-	pv_cpu_ops.set_debugreg = lguest_set_debugreg;
-	pv_cpu_ops.read_cr0 = lguest_read_cr0;
-	pv_cpu_ops.write_cr0 = lguest_write_cr0;
-	pv_cpu_ops.read_cr4 = lguest_read_cr4;
-	pv_cpu_ops.write_cr4 = lguest_write_cr4;
-	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
-	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
-	pv_cpu_ops.wbinvd = lguest_wbinvd;
-	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
-	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
-
-	/* Pagetable management */
-	pv_mmu_ops.write_cr3 = lguest_write_cr3;
-	pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
-	pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
-	pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
-	pv_mmu_ops.set_pte = lguest_set_pte;
-	pv_mmu_ops.set_pte_at = lguest_set_pte_at;
-	pv_mmu_ops.set_pmd = lguest_set_pmd;
-#ifdef CONFIG_X86_PAE
-	pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
-	pv_mmu_ops.pte_clear = lguest_pte_clear;
-	pv_mmu_ops.pmd_clear = lguest_pmd_clear;
-	pv_mmu_ops.set_pud = lguest_set_pud;
-#endif
-	pv_mmu_ops.read_cr2 = lguest_read_cr2;
-	pv_mmu_ops.read_cr3 = lguest_read_cr3;
-	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
-	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
-	pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
-	pv_mmu_ops.pte_update = lguest_pte_update;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	/* APIC read/write intercepts */
-	set_lguest_basic_apic_ops();
-#endif
-
-	x86_init.resources.memory_setup = lguest_memory_setup;
-	x86_init.irqs.intr_init = lguest_init_IRQ;
-	x86_init.timers.timer_init = lguest_time_init;
-	x86_platform.calibrate_tsc = lguest_tsc_khz;
-	x86_platform.get_wallclock =  lguest_get_wallclock;
-
-	/*
-	 * Now is a good time to look at the implementations of these functions
-	 * before returning to the rest of lguest_init().
-	 */
-
-	/*G:070
-	 * Now we've seen all the paravirt_ops, we return to
-	 * lguest_init() where the rest of the fairly chaotic boot setup
-	 * occurs.
-	 */
-
-	/*
-	 * The stack protector is a weird thing where gcc places a canary
-	 * value on the stack and then checks it on return.  This file is
-	 * compiled with -fno-stack-protector it, so we got this far without
-	 * problems.  The value of the canary is kept at offset 20 from the
-	 * %gs register, so we need to set that up before calling C functions
-	 * in other files.
-	 */
-	setup_stack_canary_segment(0);
-
-	/*
-	 * We could just call load_stack_canary_segment(), but we might as well
-	 * call switch_to_new_gdt() which loads the whole table and sets up the
-	 * per-cpu segment descriptor register %fs as well.
-	 */
-	switch_to_new_gdt(0);
-
-	/*
-	 * The Host<->Guest Switcher lives at the top of our address space, and
-	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
-	 * it put the answer in lguest_data.reserve_mem
-	 */
-	reserve_top_address(lguest_data.reserve_mem);
-
-	/* Hook in our special panic hypercall code. */
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-
-	/*
-	 * This is messy CPU setup stuff which the native boot code does before
-	 * start_kernel, so we have to do, too:
-	 */
-	cpu_detect(&new_cpu_data);
-	/* head.S usually sets up the first capability word, so do it here. */
-	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-
-	/* Math is always hard! */
-	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
-
-	/* We don't have features.  We have puppies!  Puppies! */
-#ifdef CONFIG_X86_MCE
-	mca_cfg.disabled = true;
-#endif
-#ifdef CONFIG_ACPI
-	acpi_disabled = 1;
-#endif
-
-	/*
-	 * We set the preferred console to "hvc".  This is the "hypervisor
-	 * virtual console" driver written by the PowerPC people, which we also
-	 * adapted for lguest's use.
-	 */
-	add_preferred_console("hvc", 0, NULL);
-
-	/* Register our very early console. */
-	virtio_cons_early_init(early_put_chars);
-
-	/* Don't let ACPI try to control our PCI interrupts. */
-	disable_acpi();
-
-	/* We control them ourselves, by overriding these two hooks. */
-	pcibios_enable_irq = lguest_enable_irq;
-	pcibios_disable_irq = lguest_disable_irq;
-
-	/*
-	 * Last of all, we set the power management poweroff hook to point to
-	 * the Guest routine to power off, and the reboot hook to our restart
-	 * routine.
-	 */
-	pm_power_off = lguest_power_off;
-	machine_ops.restart = lguest_restart;
-
-	/*
-	 * Now we're set up, call i386_start_kernel() in head32.c and we proceed
-	 * to boot as normal.  It never returns.
-	 */
-	i386_start_kernel();
-}
-/*
- * This marks the end of stage II of our journey, The Guest.
- *
- * It is now time for us to explore the layer of virtual drivers and complete
- * our understanding of the Guest in "make Drivers".
- */
diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
deleted file mode 100644
index d5ae63f5ec5d..000000000000
--- a/arch/x86/lguest/head_32.S
+++ /dev/null
@@ -1,192 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/lguest.h>
-#include <asm/lguest_hcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-
-/*G:020
-
- * Our story starts with the bzImage: booting starts at startup_32 in
- * arch/x86/boot/compressed/head_32.S.  This merely uncompresses the real
- * kernel in place and then jumps into it: startup_32 in
- * arch/x86/kernel/head_32.S.  Both routines expects a boot header in the %esi
- * register, which is created by the bootloader (the Launcher in our case).
- *
- * The startup_32 function does very little: it clears the uninitialized global
- * C variables which we expect to be zero (ie. BSS) and then copies the boot
- * header and kernel command line somewhere safe, and populates some initial
- * page tables.  Finally it checks the 'hardware_subarch' field.  This was
- * introduced in 2.6.24 for lguest and Xen: if it's set to '1' (lguest's
- * assigned number), then it calls us here.
- *
- * WARNING: be very careful here!  We're running at addresses equal to physical
- * addresses (around 0), not above PAGE_OFFSET as most code expects
- * (eg. 0xC0000000).  Jumps are relative, so they're OK, but we can't touch any
- * data without remembering to subtract __PAGE_OFFSET!
- *
- * The .section line puts this code in .init.text so it will be discarded after
- * boot.
- */
-.section .init.text, "ax", @progbits
-ENTRY(lguest_entry)
-	/*
-	 * We make the "initialization" hypercall now to tell the Host where
-	 * our lguest_data struct is.
-	 */
-	movl $LHCALL_LGUEST_INIT, %eax
-	movl $lguest_data - __PAGE_OFFSET, %ebx
-	int $LGUEST_TRAP_ENTRY
-
-	/* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */
-	movl $LHCALL_NEW_PGTABLE, %eax
-	movl $(initial_page_table - __PAGE_OFFSET), %ebx
-	int $LGUEST_TRAP_ENTRY
-
-	/* Set up the initial stack so we can run C code. */
-	movl $(init_thread_union+THREAD_SIZE),%esp
-
-	/* Jumps are relative: we're running __PAGE_OFFSET too low. */
-	jmp lguest_init+__PAGE_OFFSET
-
-/*G:055
- * We create a macro which puts the assembler code between lgstart_ and lgend_
- * markers.  These templates are put in the .text section: they can't be
- * discarded after boot as we may need to patch modules, too.
- */
-.text
-#define LGUEST_PATCH(name, insns...)			\
-	lgstart_##name:	insns; lgend_##name:;		\
-	.globl lgstart_##name; .globl lgend_##name
-
-LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-
-/*G:033
- * But using those wrappers is inefficient (we'll see why that doesn't matter
- * for save_fl and irq_disable later).  If we write our routines carefully in
- * assembler, we can avoid clobbering any registers and avoid jumping through
- * the wrapper functions.
- *
- * I skipped over our first piece of assembler, but this one is worth studying
- * in a bit more detail so I'll describe in easy stages.  First, the routine to
- * enable interrupts:
- */
-ENTRY(lg_irq_enable)
-	/*
-	 * The reverse of irq_disable, this sets lguest_data.irq_enabled to
-	 * X86_EFLAGS_IF (ie. "Interrupts enabled").
-	 */
-	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
-	/*
-	 * But now we need to check if the Host wants to know: there might have
-	 * been interrupts waiting to be delivered, in which case it will have
-	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
-	 * jump to send_interrupts, otherwise we're done.
-	 */
-	cmpl $0, lguest_data+LGUEST_DATA_irq_pending
-	jnz send_interrupts
-	/*
-	 * One cool thing about x86 is that you can do many things without using
-	 * a register.  In this case, the normal path hasn't needed to save or
-	 * restore any registers at all!
-	 */
-	ret
-send_interrupts:
-	/*
-	 * OK, now we need a register: eax is used for the hypercall number,
-	 * which is LHCALL_SEND_INTERRUPTS.
-	 *
-	 * We used not to bother with this pending detection at all, which was
-	 * much simpler.  Sooner or later the Host would realize it had to
-	 * send us an interrupt.  But that turns out to make performance 7
-	 * times worse on a simple tcp benchmark.  So now we do this the hard
-	 * way.
-	 */
-	pushl %eax
-	movl $LHCALL_SEND_INTERRUPTS, %eax
-	/* This is the actual hypercall trap. */
-	int  $LGUEST_TRAP_ENTRY
-	/* Put eax back the way we found it. */
-	popl %eax
-	ret
-
-/*
- * Finally, the "popf" or "restore flags" routine.  The %eax register holds the
- * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
- * enabling interrupts again, if it's 0 we're leaving them off.
- */
-ENTRY(lg_restore_fl)
-	/* This is just "lguest_data.irq_enabled = flags;" */
-	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
-	/*
-	 * Now, if the %eax value has enabled interrupts and
-	 * lguest_data.irq_pending is set, we want to tell the Host so it can
-	 * deliver any outstanding interrupts.  Fortunately, both values will
-	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
-	 * instruction will AND them together for us.  If both are set, we
-	 * jump to send_interrupts.
-	 */
-	testl lguest_data+LGUEST_DATA_irq_pending, %eax
-	jnz send_interrupts
-	/* Again, the normal path has used no extra registers.  Clever, huh? */
-	ret
-/*:*/
-
-/* These demark the EIP where host should never deliver interrupts. */
-.global lguest_noirq_iret
-
-/*M:004
- * When the Host reflects a trap or injects an interrupt into the Guest, it
- * sets the eflags interrupt bit on the stack based on lguest_data.irq_enabled,
- * so the Guest iret logic does the right thing when restoring it.  However,
- * when the Host sets the Guest up for direct traps, such as system calls, the
- * processor is the one to push eflags onto the stack, and the interrupt bit
- * will be 1 (in reality, interrupts are always enabled in the Guest).
- *
- * This turns out to be harmless: the only trap which should happen under Linux
- * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
- * regions), which has to be reflected through the Host anyway.  If another
- * trap *does* go off when interrupts are disabled, the Guest will panic, and
- * we'll never get to this iret!
-:*/
-
-/*G:045
- * There is one final paravirt_op that the Guest implements, and glancing at it
- * you can see why I left it to last.  It's *cool*!  It's in *assembler*!
- *
- * The "iret" instruction is used to return from an interrupt or trap.  The
- * stack looks like this:
- *   old address
- *   old code segment & privilege level
- *   old processor flags ("eflags")
- *
- * The "iret" instruction pops those values off the stack and restores them all
- * at once.  The only problem is that eflags includes the Interrupt Flag which
- * the Guest can't change: the CPU will simply ignore it when we do an "iret".
- * So we have to copy eflags from the stack to lguest_data.irq_enabled before
- * we do the "iret".
- *
- * There are two problems with this: firstly, we can't clobber any registers
- * and secondly, the whole thing needs to be atomic.  The first problem
- * is solved by using "push memory"/"pop memory" instruction pair for copying.
- *
- * The second is harder: copying eflags to lguest_data.irq_enabled will turn
- * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever.  Our solution to this is to tell the
- * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled. (It's not necessary to protect pop instruction, since
- * data gets updated only after it completes, so we only need to protect
- * one instruction, iret).
- */
-ENTRY(lguest_iret)
-	pushl	2*4(%esp)
-	/*
-	 * Note the %ss: segment prefix here.  Normal data accesses use the
-	 * "ds" segment, but that will have already been restored for whatever
-	 * we're returning to (such as userspace): we can't trust it.  The %ss:
-	 * prefix makes sure we use the stack segment, which is still valid.
-	 */
-	popl	%ss:lguest_data+LGUEST_DATA_irq_enabled
-lguest_noirq_iret:
-	iret
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 5cc78bf57232..3261abb21ef4 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
 	return 0;	/* Buffer overrun */
 }
 
+/*
+ * Find a non-boolean option (i.e. option=argument). In accordance with
+ * standard Linux practice, if this option is repeated, this returns the
+ * last instance on the command line.
+ *
+ * @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
+ * @option: option string to look for
+ * @buffer: memory buffer to return the option argument
+ * @bufsize: size of the supplied memory buffer
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+static int
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
+		      const char *option, char *buffer, int bufsize)
+{
+	char c;
+	int pos = 0, len = -1;
+	const char *opptr = NULL;
+	char *bufptr = buffer;
+	enum {
+		st_wordstart = 0,	/* Start of word/after whitespace */
+		st_wordcmp,	/* Comparing this word */
+		st_wordskip,	/* Miscompare, skip */
+		st_bufcpy,	/* Copying this to buffer */
+	} state = st_wordstart;
+
+	if (!cmdline)
+		return -1;      /* No command line */
+
+	/*
+	 * This 'pos' check ensures we do not overrun
+	 * a non-NULL-terminated 'cmdline'
+	 */
+	while (pos++ < max_cmdline_size) {
+		c = *(char *)cmdline++;
+		if (!c)
+			break;
+
+		switch (state) {
+		case st_wordstart:
+			if (myisspace(c))
+				break;
+
+			state = st_wordcmp;
+			opptr = option;
+			/* fall through */
+
+		case st_wordcmp:
+			if ((c == '=') && !*opptr) {
+				/*
+				 * We matched all the way to the end of the
+				 * option we were looking for, prepare to
+				 * copy the argument.
+				 */
+				len = 0;
+				bufptr = buffer;
+				state = st_bufcpy;
+				break;
+			} else if (c == *opptr++) {
+				/*
+				 * We are currently matching, so continue
+				 * to the next character on the cmdline.
+				 */
+				break;
+			}
+			state = st_wordskip;
+			/* fall through */
+
+		case st_wordskip:
+			if (myisspace(c))
+				state = st_wordstart;
+			break;
+
+		case st_bufcpy:
+			if (myisspace(c)) {
+				state = st_wordstart;
+			} else {
+				/*
+				 * Increment len, but don't overrun the
+				 * supplied buffer and leave room for the
+				 * NULL terminator.
+				 */
+				if (++len < bufsize)
+					*bufptr++ = c;
+			}
+			break;
+		}
+	}
+
+	if (bufsize)
+		*bufptr = '\0';
+
+	return len;
+}
+
 int cmdline_find_option_bool(const char *cmdline, const char *option)
 {
 	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
 }
+
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
+			int bufsize)
+{
+	return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
+				     buffer, bufsize);
+}
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
index f77ba3058b31..066996dba6a2 100644
--- a/arch/x86/math-emu/div_Xsig.S
+++ b/arch/x86/math-emu/div_Xsig.S
@@ -363,3 +363,4 @@ L_bugged_2:
 	pop	%ebx
 	jmp	L_exit
 #endif /* PARANOID */ 
+ENDPROC(div_Xsig)
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
index 47099628fa4c..2c71527bd917 100644
--- a/arch/x86/math-emu/div_small.S
+++ b/arch/x86/math-emu/div_small.S
@@ -44,4 +44,4 @@ ENTRY(FPU_div_small)
 
 	leave
 	ret
-
+ENDPROC(FPU_div_small)
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 0203baefb5c0..d4a7df2205b8 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -147,7 +147,7 @@ void math_emulate(struct math_emu_info *info)
 		}
 
 		code_descriptor = FPU_get_ldt_descriptor(FPU_CS);
-		if (SEG_D_SIZE(code_descriptor)) {
+		if (code_descriptor.d) {
 			/* The above test may be wrong, the book is not clear */
 			/* Segmented 32 bit protected mode */
 			addr_modes.default_mode = SEG32;
@@ -155,11 +155,10 @@ void math_emulate(struct math_emu_info *info)
 			/* 16 bit protected mode */
 			addr_modes.default_mode = PM16;
 		}
-		FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
-		code_limit = code_base
-		    + (SEG_LIMIT(code_descriptor) +
-		       1) * SEG_GRANULARITY(code_descriptor)
-		    - 1;
+		FPU_EIP += code_base = seg_get_base(&code_descriptor);
+		code_limit = seg_get_limit(&code_descriptor) + 1;
+		code_limit *= seg_get_granularity(&code_descriptor);
+		code_limit += code_base - 1;
 		if (code_limit < code_base)
 			code_limit = 0xffffffff;
 	}
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index a179254a5122..699f329f1d40 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -34,17 +34,43 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
 	return ret;
 }
 
-#define SEG_D_SIZE(x)		((x).b & (3 << 21))
-#define SEG_G_BIT(x)		((x).b & (1 << 23))
-#define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
-#define SEG_286_MODE(x)		((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
-#define SEG_BASE_ADDR(s)	(((s).b & 0xff000000) \
-				 | (((s).b & 0xff) << 16) | ((s).a >> 16))
-#define SEG_LIMIT(s)		(((s).b & 0xff0000) | ((s).a & 0xffff))
-#define SEG_EXECUTE_ONLY(s)	(((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
-#define SEG_WRITE_PERM(s)	(((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
-#define SEG_EXPAND_DOWN(s)	(((s).b & ((1 << 11) | (1 << 10))) \
-				 == (1 << 10))
+#define SEG_TYPE_WRITABLE	(1U << 1)
+#define SEG_TYPE_EXPANDS_DOWN	(1U << 2)
+#define SEG_TYPE_EXECUTE	(1U << 3)
+#define SEG_TYPE_EXPAND_MASK	(SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE)
+#define SEG_TYPE_EXECUTE_MASK	(SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE)
+
+static inline unsigned long seg_get_base(struct desc_struct *d)
+{
+	unsigned long base = (unsigned long)d->base2 << 24;
+
+	return base | ((unsigned long)d->base1 << 16) | d->base0;
+}
+
+static inline unsigned long seg_get_limit(struct desc_struct *d)
+{
+	return ((unsigned long)d->limit1 << 16) | d->limit0;
+}
+
+static inline unsigned long seg_get_granularity(struct desc_struct *d)
+{
+	return d->g ? 4096 : 1;
+}
+
+static inline bool seg_expands_down(struct desc_struct *d)
+{
+	return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN;
+}
+
+static inline bool seg_execute_only(struct desc_struct *d)
+{
+	return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE;
+}
+
+static inline bool seg_writable(struct desc_struct *d)
+{
+	return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
+}
 
 #define I387			(&current->thread.fpu.state)
 #define FPU_info		(I387->soft.info)
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index b8ef9f9d2ffc..c48967c6a0e2 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -159,17 +159,18 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 	}
 
 	descriptor = FPU_get_ldt_descriptor(addr->selector);
-	base_address = SEG_BASE_ADDR(descriptor);
+	base_address = seg_get_base(&descriptor);
 	address = base_address + offset;
-	limit = base_address
-	    + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+	limit = seg_get_limit(&descriptor) + 1;
+	limit *= seg_get_granularity(&descriptor);
+	limit += base_address - 1;
 	if (limit < base_address)
 		limit = 0xffffffff;
 
-	if (SEG_EXPAND_DOWN(descriptor)) {
-		if (SEG_G_BIT(descriptor))
+	if (seg_expands_down(&descriptor)) {
+		if (descriptor.g) {
 			seg_top = 0xffffffff;
-		else {
+		} else {
 			seg_top = base_address + (1 << 20);
 			if (seg_top < base_address)
 				seg_top = 0xffffffff;
@@ -182,8 +183,8 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 		    (address > limit) || (address < base_address) ? 0 :
 		    ((limit - address) >= 254 ? 255 : limit - address + 1);
 	}
-	if (SEG_EXECUTE_ONLY(descriptor) ||
-	    (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
+	if (seg_execute_only(&descriptor) ||
+	    (!seg_writable(&descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
 		access_limit = 0;
 	}
 	return address;
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
index 717785a53eb4..22e0631bb85a 100644
--- a/arch/x86/math-emu/mul_Xsig.S
+++ b/arch/x86/math-emu/mul_Xsig.S
@@ -62,6 +62,7 @@ ENTRY(mul32_Xsig)
 	popl %esi
 	leave
 	ret
+ENDPROC(mul32_Xsig)
 
 
 ENTRY(mul64_Xsig)
@@ -114,6 +115,7 @@ ENTRY(mul64_Xsig)
 	popl %esi
 	leave
 	ret
+ENDPROC(mul64_Xsig)
 
 
 
@@ -173,4 +175,4 @@ ENTRY(mul_Xsig_Xsig)
 	popl %esi
 	leave
 	ret
-
+ENDPROC(mul_Xsig_Xsig)
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
index 17315c89ff3d..a9aaf414135d 100644
--- a/arch/x86/math-emu/polynom_Xsig.S
+++ b/arch/x86/math-emu/polynom_Xsig.S
@@ -133,3 +133,4 @@ L_accum_done:
 	popl	%esi
 	leave
 	ret
+ENDPROC(polynomial_Xsig)
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
index 8b6352efceef..53ac1a343c69 100644
--- a/arch/x86/math-emu/reg_norm.S
+++ b/arch/x86/math-emu/reg_norm.S
@@ -94,6 +94,7 @@ L_overflow:
 	call	arith_overflow
 	pop	%ebx
 	jmp	L_exit
+ENDPROC(FPU_normalize)
 
 
 
@@ -145,3 +146,4 @@ L_exit_nuo_zero:
 	popl	%ebx
 	leave
 	ret
+ENDPROC(FPU_normalize_nuo)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
index d1d4e48b4f67..41af5b208d88 100644
--- a/arch/x86/math-emu/reg_round.S
+++ b/arch/x86/math-emu/reg_round.S
@@ -706,3 +706,5 @@ L_exception_exit:
 	mov	$-1,%eax
 	jmp	fpu_reg_round_special_exit
 #endif /* PARANOID */ 
+
+ENDPROC(FPU_round)
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
index 47c4c2434d85..3b1bc5e9b2f6 100644
--- a/arch/x86/math-emu/reg_u_add.S
+++ b/arch/x86/math-emu/reg_u_add.S
@@ -165,3 +165,4 @@ L_exit:
 	leave
 	ret
 #endif /* PARANOID */
+ENDPROC(FPU_u_add)
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
index cc00654b6f9a..796eb5ab921b 100644
--- a/arch/x86/math-emu/reg_u_div.S
+++ b/arch/x86/math-emu/reg_u_div.S
@@ -469,3 +469,5 @@ L_exit:
 	leave
 	ret
 #endif /* PARANOID */ 
+
+ENDPROC(FPU_u_div)
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
index 973f12af97df..6196f68cf3c1 100644
--- a/arch/x86/math-emu/reg_u_mul.S
+++ b/arch/x86/math-emu/reg_u_mul.S
@@ -146,3 +146,4 @@ L_exit:
 	ret
 #endif /* PARANOID */ 
 
+ENDPROC(FPU_u_mul)
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
index 1b6c24801d22..d115b900919a 100644
--- a/arch/x86/math-emu/reg_u_sub.S
+++ b/arch/x86/math-emu/reg_u_sub.S
@@ -270,3 +270,4 @@ L_exit:
 	popl	%esi
 	leave
 	ret
+ENDPROC(FPU_u_sub)
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
index bbe0e87718e4..87c99749a495 100644
--- a/arch/x86/math-emu/round_Xsig.S
+++ b/arch/x86/math-emu/round_Xsig.S
@@ -78,7 +78,7 @@ L_exit:
 	popl	%ebx
 	leave
 	ret
-
+ENDPROC(round_Xsig)
 
 
 
@@ -138,4 +138,4 @@ L_n_exit:
 	popl	%ebx
 	leave
 	ret
-
+ENDPROC(norm_Xsig)
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
index 31cdd118e918..c8552edeec75 100644
--- a/arch/x86/math-emu/shr_Xsig.S
+++ b/arch/x86/math-emu/shr_Xsig.S
@@ -85,3 +85,4 @@ L_more_than_95:
 	popl	%esi
 	leave
 	ret
+ENDPROC(shr_Xsig)
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
index 518428317985..340dd6897f85 100644
--- a/arch/x86/math-emu/wm_shrx.S
+++ b/arch/x86/math-emu/wm_shrx.S
@@ -92,6 +92,7 @@ L_more_than_95:
 	popl	%esi
 	leave
 	ret
+ENDPROC(FPU_shrx)
 
 
 /*---------------------------------------------------------------------------+
@@ -202,3 +203,4 @@ Ls_more_than_95:
 	popl	%esi
 	leave
 	ret
+ENDPROC(FPU_shrxs)
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index d258f59564e1..695afae38fdf 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -468,3 +468,4 @@ sqrt_more_prec_large:
 /* Our estimate is too large */
 	movl	$0x7fffff00,%eax
 	jmp	sqrt_round_result
+ENDPROC(wm_sqrt)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 0fbdcb64f9f8..72bf8c01c6e3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -39,3 +39,5 @@ obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
+obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0470826d2bdc..5e3ac6fe6c9e 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -13,12 +13,12 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 
-#include <asm/kasan.h>
 #include <asm/pgtable.h>
 
 /*
@@ -138,7 +138,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 {
 	pgprotval_t pr = pgprot_val(prot);
 	static const char * const level_name[] =
-		{ "cr3", "pgd", "pud", "pmd", "pte" };
+		{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
 	if (!pgprot_val(prot)) {
 		/* Not present */
@@ -162,12 +162,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 			pt_dump_cont_printf(m, dmsg, "    ");
 
 		/* Bit 7 has a different meaning on level 3 vs 4 */
-		if (level <= 3 && pr & _PAGE_PSE)
+		if (level <= 4 && pr & _PAGE_PSE)
 			pt_dump_cont_printf(m, dmsg, "PSE ");
 		else
 			pt_dump_cont_printf(m, dmsg, "    ");
-		if ((level == 4 && pr & _PAGE_PAT) ||
-		    ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
+		if ((level == 5 && pr & _PAGE_PAT) ||
+		    ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE))
 			pt_dump_cont_printf(m, dmsg, "PAT ");
 		else
 			pt_dump_cont_printf(m, dmsg, "    ");
@@ -188,11 +188,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
  */
 static unsigned long normalize_addr(unsigned long u)
 {
-#ifdef CONFIG_X86_64
-	return (signed long)(u << 16) >> 16;
-#else
-	return u;
-#endif
+	int shift;
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return u;
+
+	shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+	return (signed long)(u << shift) >> shift;
 }
 
 /*
@@ -297,32 +298,62 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		prot = pte_flags(*start);
 		st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
-		note_page(m, st, __pgprot(prot), 4);
+		note_page(m, st, __pgprot(prot), 5);
 		start++;
 	}
 }
+#ifdef CONFIG_KASAN
+
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_zero_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+				void *pt)
+{
+	if (__pa(pt) == __pa(kasan_zero_pmd) ||
+#ifdef CONFIG_X86_5LEVEL
+	    __pa(pt) == __pa(kasan_zero_p4d) ||
+#endif
+	    __pa(pt) == __pa(kasan_zero_pud)) {
+		pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
+		note_page(m, st, __pgprot(prot), 5);
+		return true;
+	}
+	return false;
+}
+#else
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+				void *pt)
+{
+	return false;
+}
+#endif
 
 #if PTRS_PER_PMD > 1
 
 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
 {
 	int i;
-	pmd_t *start;
+	pmd_t *start, *pmd_start;
 	pgprotval_t prot;
 
-	start = (pmd_t *)pud_page_vaddr(addr);
+	pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
 	for (i = 0; i < PTRS_PER_PMD; i++) {
 		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 		if (!pmd_none(*start)) {
 			if (pmd_large(*start) || !pmd_present(*start)) {
 				prot = pmd_flags(*start);
-				note_page(m, st, __pgprot(prot), 3);
-			} else {
+				note_page(m, st, __pgprot(prot), 4);
+			} else if (!kasan_page_table(m, st, pmd_start)) {
 				walk_pte_level(m, st, *start,
 					       P + i * PMD_LEVEL_MULT);
 			}
 		} else
-			note_page(m, st, __pgprot(0), 3);
+			note_page(m, st, __pgprot(0), 4);
 		start++;
 	}
 }
@@ -335,39 +366,27 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 
 #if PTRS_PER_PUD > 1
 
-/*
- * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
- * KASAN fills page tables with the same values. Since there is no
- * point in checking page table more than once we just skip repeated
- * entries. This saves us dozens of seconds during boot.
- */
-static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
-{
-	return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
-}
-
 static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
 {
 	int i;
-	pud_t *start;
+	pud_t *start, *pud_start;
 	pgprotval_t prot;
 	pud_t *prev_pud = NULL;
 
-	start = (pud_t *)p4d_page_vaddr(addr);
+	pud_start = start = (pud_t *)p4d_page_vaddr(addr);
 
 	for (i = 0; i < PTRS_PER_PUD; i++) {
 		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
-		if (!pud_none(*start) &&
-		    !pud_already_checked(prev_pud, start, st->check_wx)) {
+		if (!pud_none(*start)) {
 			if (pud_large(*start) || !pud_present(*start)) {
 				prot = pud_flags(*start);
-				note_page(m, st, __pgprot(prot), 2);
-			} else {
+				note_page(m, st, __pgprot(prot), 3);
+			} else if (!kasan_page_table(m, st, pud_start)) {
 				walk_pmd_level(m, st, *start,
 					       P + i * PUD_LEVEL_MULT);
 			}
 		} else
-			note_page(m, st, __pgprot(0), 2);
+			note_page(m, st, __pgprot(0), 3);
 
 		prev_pud = start;
 		start++;
@@ -385,10 +404,10 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
 static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
 {
 	int i;
-	p4d_t *start;
+	p4d_t *start, *p4d_start;
 	pgprotval_t prot;
 
-	start = (p4d_t *)pgd_page_vaddr(addr);
+	p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
 
 	for (i = 0; i < PTRS_PER_P4D; i++) {
 		st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
@@ -396,7 +415,7 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 			if (p4d_large(*start) || !p4d_present(*start)) {
 				prot = p4d_flags(*start);
 				note_page(m, st, __pgprot(prot), 2);
-			} else {
+			} else if (!kasan_page_table(m, st, p4d_start)) {
 				walk_pud_level(m, st, *start,
 					       P + i * P4D_LEVEL_MULT);
 			}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 0ea8afcb929c..c076f710de4c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_fault);
 
+/*
+ * Handler for UD0 exception following a failed test against the
+ * result of a refcount inc/dec/add/sub.
+ */
+bool ex_handler_refcount(const struct exception_table_entry *fixup,
+			 struct pt_regs *regs, int trapnr)
+{
+	/* First unconditionally saturate the refcount. */
+	*(int *)regs->cx = INT_MIN / 2;
+
+	/*
+	 * Strictly speaking, this reports the fixup destination, not
+	 * the fault location, and not the actually overflowing
+	 * instruction, which is the instruction before the "js", but
+	 * since that instruction could be a variety of lengths, just
+	 * report the location after the overflow, which should be close
+	 * enough for finding the overflow, as it's at least back in
+	 * the function, having returned from .text.unlikely.
+	 */
+	regs->ip = ex_fixup_addr(fixup);
+
+	/*
+	 * This function has been called because either a negative refcount
+	 * value was seen by any of the refcount functions, or a zero
+	 * refcount value was seen by refcount_dec().
+	 *
+	 * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
+	 * wrapped around) will be set. Additionally, seeing the refcount
+	 * reach 0 will set ZF (Zero Flag: result was zero). In each of
+	 * these cases we want a report, since it's a boundary condition.
+	 *
+	 */
+	if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
+		bool zero = regs->flags & X86_EFLAGS_ZF;
+
+		refcount_error_report(regs, zero ? "hit zero" : "overflow");
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_refcount);
+
 bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {
@@ -142,7 +184,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	 * undefined.  I'm not sure which CPUs do this, but at least
 	 * the 486 DX works this way.
 	 */
-	if ((regs->cs & 0xFFFF) != __KERNEL_CS)
+	if (regs->cs != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2a1fa10c6a98..b836a7274e12 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -396,14 +396,18 @@ static void dump_pagetable(unsigned long address)
 	pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-	printk("*pdpt = %016Lx ", pgd_val(*pgd));
+	pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
 	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
 		goto out;
+#define pr_pde pr_cont
+#else
+#define pr_pde pr_info
 #endif
 	p4d = p4d_offset(pgd, address);
 	pud = pud_offset(p4d, address);
 	pmd = pmd_offset(pud, address);
-	printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+	pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+#undef pr_pde
 
 	/*
 	 * We must not directly access the pte in the highpte
@@ -415,9 +419,9 @@ static void dump_pagetable(unsigned long address)
 		goto out;
 
 	pte = pte_offset_kernel(pmd, address);
-	printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+	pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
 out:
-	printk("\n");
+	pr_cont("\n");
 }
 
 #else /* CONFIG_X86_64: */
@@ -565,7 +569,7 @@ static void dump_pagetable(unsigned long address)
 	if (bad_address(pgd))
 		goto bad;
 
-	printk("PGD %lx ", pgd_val(*pgd));
+	pr_info("PGD %lx ", pgd_val(*pgd));
 
 	if (!pgd_present(*pgd))
 		goto out;
@@ -574,7 +578,7 @@ static void dump_pagetable(unsigned long address)
 	if (bad_address(p4d))
 		goto bad;
 
-	printk("P4D %lx ", p4d_val(*p4d));
+	pr_cont("P4D %lx ", p4d_val(*p4d));
 	if (!p4d_present(*p4d) || p4d_large(*p4d))
 		goto out;
 
@@ -582,7 +586,7 @@ static void dump_pagetable(unsigned long address)
 	if (bad_address(pud))
 		goto bad;
 
-	printk("PUD %lx ", pud_val(*pud));
+	pr_cont("PUD %lx ", pud_val(*pud));
 	if (!pud_present(*pud) || pud_large(*pud))
 		goto out;
 
@@ -590,7 +594,7 @@ static void dump_pagetable(unsigned long address)
 	if (bad_address(pmd))
 		goto bad;
 
-	printk("PMD %lx ", pmd_val(*pmd));
+	pr_cont("PMD %lx ", pmd_val(*pmd));
 	if (!pmd_present(*pmd) || pmd_large(*pmd))
 		goto out;
 
@@ -598,12 +602,12 @@ static void dump_pagetable(unsigned long address)
 	if (bad_address(pte))
 		goto bad;
 
-	printk("PTE %lx", pte_val(*pte));
+	pr_cont("PTE %lx", pte_val(*pte));
 out:
-	printk("\n");
+	pr_cont("\n");
 	return;
 bad:
-	printk("BAD\n");
+	pr_info("BAD\n");
 }
 
 #endif /* CONFIG_X86_64 */
@@ -1254,10 +1258,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
  * routines.
- *
- * This function must have noinline because both callers
- * {,trace_}do_page_fault() have notrace on. Having this an actual function
- * guarantees there's a function trace entry.
  */
 static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long error_code,
@@ -1490,27 +1490,6 @@ good_area:
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
-	unsigned long address = read_cr2(); /* Get the faulting address */
-	enum ctx_state prev_state;
-
-	/*
-	 * We must have this function tagged with __kprobes, notrace and call
-	 * read_cr2() before calling anything else. To avoid calling any kind
-	 * of tracing machinery before we've observed the CR2 value.
-	 *
-	 * exception_{enter,exit}() contain all sorts of tracepoints.
-	 */
-
-	prev_state = exception_enter();
-	__do_page_fault(regs, error_code, address);
-	exception_exit(prev_state);
-}
-NOKPROBE_SYMBOL(do_page_fault);
-
-#ifdef CONFIG_TRACING
 static nokprobe_inline void
 trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
 			 unsigned long error_code)
@@ -1521,22 +1500,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
+/*
+ * We must have this function blacklisted from kprobes, tagged with notrace
+ * and call read_cr2() before calling anything else. To avoid calling any
+ * kind of tracing machinery before we've observed the CR2 value.
+ *
+ * exception_{enter,exit}() contains all sorts of tracepoints.
+ */
 dotraplinkage void notrace
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
-	/*
-	 * The exception_enter and tracepoint processing could
-	 * trigger another page faults (user space callchain
-	 * reading) and destroy the original cr2 value, so read
-	 * the faulting address now.
-	 */
-	unsigned long address = read_cr2();
+	unsigned long address = read_cr2(); /* Get the faulting address */
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
-	trace_page_fault_entries(address, regs, error_code);
+	if (trace_pagefault_enabled())
+		trace_page_fault_entries(address, regs, error_code);
+
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
-NOKPROBE_SYMBOL(trace_do_page_fault);
-#endif /* CONFIG_TRACING */
+NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 2824607df108..6d06cf33e3de 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -18,6 +18,7 @@
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/elf.h>
+#include <asm/mpx.h>
 
 #if 0	/* This is just for testing */
 struct page *
@@ -85,25 +86,38 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = get_mmap_base(1);
+
+	/*
+	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+	 * in the full address space.
+	 */
 	info.high_limit = in_compat_syscall() ?
-		tasksize_32bit() : tasksize_64bit();
+		task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
+
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
 	return vm_unmapped_area(&info);
 }
 
 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
-		unsigned long addr0, unsigned long len,
+		unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
 	struct hstate *h = hstate_file(file);
 	struct vm_unmapped_area_info info;
-	unsigned long addr;
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
 	info.low_limit = PAGE_SIZE;
 	info.high_limit = get_mmap_base(0);
+
+	/*
+	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+	 * in the full address space.
+	 */
+	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
+
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
 	addr = vm_unmapped_area(&info);
@@ -118,7 +132,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = TASK_SIZE;
+		info.high_limit = TASK_SIZE_LOW;
 		addr = vm_unmapped_area(&info);
 	}
 
@@ -135,6 +149,11 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
+
+	addr = mpx_unmapped_area_check(addr, len, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab1595f4bd..31cea988fa36 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (!pmd)
 			return -ENOMEM;
 		ident_pmd_init(info, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (!pud)
 			return -ENOMEM;
 		ident_pud_init(info, pud, addr, next);
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	unsigned long next;
 	int result;
 
+	/* Set the default pagetable flags if not supplied */
+	if (!info->kernpg_flag)
+		info->kernpg_flag = _KERNPG_TABLE;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
 		}
 	}
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bf3f1065d6ad..7777ccc0e9f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,7 +815,7 @@ void __init zone_sizes_init(void)
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
 	.loaded_mm = &init_mm,
-	.state = 0,
+	.next_asid = 1,
 	.cr4 = ~0UL,	/* fail hard if we screw up cr4 shadow initialization */
 };
 EXPORT_SYMBOL_GPL(cpu_tlbstate);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4c1b5fd0c7ad..34f0e1847dd6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,6 +13,8 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mmiotrace.h>
+#include <linux/mem_encrypt.h>
+#include <linux/efi.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -21,6 +23,7 @@
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
+#include <asm/setup.h>
 
 #include "physaddr.h"
 
@@ -106,12 +109,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	}
 
 	/*
-	 * Don't remap the low PCI/ISA area, it's always mapped..
-	 */
-	if (is_ISA_range(phys_addr, last_addr))
-		return (__force void __iomem *)phys_to_virt(phys_addr);
-
-	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
 	pfn      = phys_addr >> PAGE_SHIFT;
@@ -340,13 +337,17 @@ void iounmap(volatile void __iomem *addr)
 		return;
 
 	/*
-	 * __ioremap special-cases the PCI/ISA range by not instantiating a
-	 * vm_area and by simply returning an address into the kernel mapping
-	 * of ISA space.   So handle that here.
+	 * The PCI/ISA range special-casing was removed from __ioremap()
+	 * so this check, in theory, can be removed. However, there are
+	 * cases where iounmap() is called for addresses not obtained via
+	 * ioremap() (vga16fb for example). Add a warning so that these
+	 * cases can be caught and fixed.
 	 */
 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
-	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
+	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
+		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
 		return;
+	}
 
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
@@ -399,12 +400,10 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
 	unsigned long offset = phys & ~PAGE_MASK;
 	void *vaddr;
 
-	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
-	if (page_is_ram(start >> PAGE_SHIFT))
-		return __va(phys);
+	/* memremap() maps if RAM, otherwise falls back to ioremap() */
+	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
 
-	vaddr = ioremap_cache(start, PAGE_SIZE);
-	/* Only add the offset on success and return NULL if the ioremap() failed: */
+	/* Only add the offset on success and return NULL if memremap() failed */
 	if (vaddr)
 		vaddr += offset;
 
@@ -413,11 +412,263 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 {
-	if (page_is_ram(phys >> PAGE_SHIFT))
-		return;
+	memunmap((void *)((unsigned long)addr & PAGE_MASK));
+}
+
+/*
+ * Examine the physical address to determine if it is an area of memory
+ * that should be mapped decrypted.  If the memory is not part of the
+ * kernel usable area it was accessed and created decrypted, so these
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
+ */
+static bool memremap_should_map_decrypted(resource_size_t phys_addr,
+					  unsigned long size)
+{
+	int is_pmem;
+
+	/*
+	 * Check if the address is part of a persistent memory region.
+	 * This check covers areas added by E820, EFI and ACPI.
+	 */
+	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+				    IORES_DESC_PERSISTENT_MEMORY);
+	if (is_pmem != REGION_DISJOINT)
+		return true;
+
+	/*
+	 * Check if the non-volatile attribute is set for an EFI
+	 * reserved area.
+	 */
+	if (efi_enabled(EFI_BOOT)) {
+		switch (efi_mem_type(phys_addr)) {
+		case EFI_RESERVED_TYPE:
+			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+				return true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Check if the address is outside kernel usable area */
+	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
+	case E820_TYPE_RESERVED:
+	case E820_TYPE_ACPI:
+	case E820_TYPE_NVS:
+	case E820_TYPE_UNUSABLE:
+	case E820_TYPE_PRAM:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is EFI data. Check
+ * it against the boot params structure and EFI tables and memory types.
+ */
+static bool memremap_is_efi_data(resource_size_t phys_addr,
+				 unsigned long size)
+{
+	u64 paddr;
+
+	/* Check if the address is part of EFI boot/runtime data */
+	if (!efi_enabled(EFI_BOOT))
+		return false;
+
+	paddr = boot_params.efi_info.efi_memmap_hi;
+	paddr <<= 32;
+	paddr |= boot_params.efi_info.efi_memmap;
+	if (phys_addr == paddr)
+		return true;
+
+	paddr = boot_params.efi_info.efi_systab_hi;
+	paddr <<= 32;
+	paddr |= boot_params.efi_info.efi_systab;
+	if (phys_addr == paddr)
+		return true;
+
+	if (efi_is_table_address(phys_addr))
+		return true;
+
+	switch (efi_mem_type(phys_addr)) {
+	case EFI_BOOT_SERVICES_DATA:
+	case EFI_RUNTIME_SERVICES_DATA:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain.
+ */
+static bool memremap_is_setup_data(resource_size_t phys_addr,
+				   unsigned long size)
+{
+	struct setup_data *data;
+	u64 paddr, paddr_next;
+
+	paddr = boot_params.hdr.setup_data;
+	while (paddr) {
+		unsigned int len;
+
+		if (phys_addr == paddr)
+			return true;
+
+		data = memremap(paddr, sizeof(*data),
+				MEMREMAP_WB | MEMREMAP_DEC);
+
+		paddr_next = data->next;
+		len = data->len;
+
+		memunmap(data);
+
+		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+			return true;
+
+		paddr = paddr_next;
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain (early boot version).
+ */
+static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
+						unsigned long size)
+{
+	struct setup_data *data;
+	u64 paddr, paddr_next;
+
+	paddr = boot_params.hdr.setup_data;
+	while (paddr) {
+		unsigned int len;
+
+		if (phys_addr == paddr)
+			return true;
+
+		data = early_memremap_decrypted(paddr, sizeof(*data));
+
+		paddr_next = data->next;
+		len = data->len;
+
+		early_memunmap(data, sizeof(*data));
+
+		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+			return true;
+
+		paddr = paddr_next;
+	}
+
+	return false;
+}
+
+/*
+ * Architecture function to determine if RAM remap is allowed. By default, a
+ * RAM remap will map the data as encrypted. Determine if a RAM remap should
+ * not be done so that the data will be mapped decrypted.
+ */
+bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
+				 unsigned long flags)
+{
+	if (!sme_active())
+		return true;
+
+	if (flags & MEMREMAP_ENC)
+		return true;
+
+	if (flags & MEMREMAP_DEC)
+		return false;
+
+	if (memremap_is_setup_data(phys_addr, size) ||
+	    memremap_is_efi_data(phys_addr, size) ||
+	    memremap_should_map_decrypted(phys_addr, size))
+		return false;
+
+	return true;
+}
+
+/*
+ * Architecture override of __weak function to adjust the protection attributes
+ * used when remapping memory. By default, early_memremap() will map the data
+ * as encrypted. Determine if an encrypted mapping should not be done and set
+ * the appropriate protection attributes.
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+					     unsigned long size,
+					     pgprot_t prot)
+{
+	if (!sme_active())
+		return prot;
+
+	if (early_memremap_is_setup_data(phys_addr, size) ||
+	    memremap_is_efi_data(phys_addr, size) ||
+	    memremap_should_map_decrypted(phys_addr, size))
+		prot = pgprot_decrypted(prot);
+	else
+		prot = pgprot_encrypted(prot);
+
+	return prot;
+}
+
+bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
+{
+	return arch_memremap_can_ram_remap(phys_addr, size, 0);
+}
+
+#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
+/* Remap memory with encryption */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+				      unsigned long size)
+{
+	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
+}
+
+/*
+ * Remap memory with encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+					 unsigned long size)
+{
+	/* Be sure the write-protect PAT entry is set for write-protect */
+	if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
+		return NULL;
+
+	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
+}
+
+/* Remap memory without encryption */
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+				      unsigned long size)
+{
+	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
+}
+
+/*
+ * Remap memory without encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+					 unsigned long size)
+{
+	/* Be sure the write-protect PAT entry is set for write-protect */
+	if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
+		return NULL;
 
-	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
 }
+#endif	/* CONFIG_ARCH_USE_MEMREMAP_PROT */
 
 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 02c9d7553409..bc84b73684b7 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -11,8 +11,8 @@
 #include <asm/e820/types.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/pgtable.h>
 
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static int __init map_range(struct range *range)
@@ -87,7 +87,7 @@ static struct notifier_block kasan_die_notifier = {
 void __init kasan_early_init(void)
 {
 	int i;
-	pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
+	pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL | _PAGE_ENC;
 	pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
 	pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
 	p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
@@ -153,7 +153,7 @@ void __init kasan_init(void)
 	 */
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	for (i = 0; i < PTRS_PER_PTE; i++) {
-		pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+		pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC);
 		set_pte(&kasan_zero_pte[i], pte);
 	}
 	/* Flush TLBs again to be sure that write protection applied. */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
new file mode 100644
index 000000000000..0fbd09269757
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt.c
@@ -0,0 +1,593 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+#include <linux/mem_encrypt.h>
+
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/setup.h>
+#include <asm/bootparam.h>
+#include <asm/set_memory.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/cmdline.h>
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[]  __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
+
+/*
+ * Since SME related variables are set early in the boot process they must
+ * reside in the .data section so as not to be zeroed out when the .bss
+ * section is later cleared.
+ */
+unsigned long sme_me_mask __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sme_me_mask);
+
+/* Buffer used for early in-place encryption by BSP, no locking needed */
+static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * This routine does not change the underlying encryption setting of the
+ * page(s) that map this memory. It assumes that eventually the memory is
+ * meant to be accessed as either encrypted or decrypted but the contents
+ * are currently not in the desired state.
+ *
+ * This routine follows the steps outlined in the AMD64 Architecture
+ * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
+ */
+static void __init __sme_early_enc_dec(resource_size_t paddr,
+				       unsigned long size, bool enc)
+{
+	void *src, *dst;
+	size_t len;
+
+	if (!sme_me_mask)
+		return;
+
+	local_flush_tlb();
+	wbinvd();
+
+	/*
+	 * There are limited number of early mapping slots, so map (at most)
+	 * one page at time.
+	 */
+	while (size) {
+		len = min_t(size_t, sizeof(sme_early_buffer), size);
+
+		/*
+		 * Create mappings for the current and desired format of
+		 * the memory. Use a write-protected mapping for the source.
+		 */
+		src = enc ? early_memremap_decrypted_wp(paddr, len) :
+			    early_memremap_encrypted_wp(paddr, len);
+
+		dst = enc ? early_memremap_encrypted(paddr, len) :
+			    early_memremap_decrypted(paddr, len);
+
+		/*
+		 * If a mapping can't be obtained to perform the operation,
+		 * then eventual access of that area in the desired mode
+		 * will cause a crash.
+		 */
+		BUG_ON(!src || !dst);
+
+		/*
+		 * Use a temporary buffer, of cache-line multiple size, to
+		 * avoid data corruption as documented in the APM.
+		 */
+		memcpy(sme_early_buffer, src, len);
+		memcpy(dst, sme_early_buffer, len);
+
+		early_memunmap(dst, len);
+		early_memunmap(src, len);
+
+		paddr += len;
+		size -= len;
+	}
+}
+
+void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
+{
+	__sme_early_enc_dec(paddr, size, true);
+}
+
+void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
+{
+	__sme_early_enc_dec(paddr, size, false);
+}
+
+static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
+					     bool map)
+{
+	unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
+	pmdval_t pmd_flags, pmd;
+
+	/* Use early_pmd_flags but remove the encryption mask */
+	pmd_flags = __sme_clr(early_pmd_flags);
+
+	do {
+		pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
+		__early_make_pgtable((unsigned long)vaddr, pmd);
+
+		vaddr += PMD_SIZE;
+		paddr += PMD_SIZE;
+		size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
+	} while (size);
+
+	__native_flush_tlb();
+}
+
+void __init sme_unmap_bootdata(char *real_mode_data)
+{
+	struct boot_params *boot_data;
+	unsigned long cmdline_paddr;
+
+	if (!sme_active())
+		return;
+
+	/* Get the command line address before unmapping the real_mode_data */
+	boot_data = (struct boot_params *)real_mode_data;
+	cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
+
+	__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false);
+
+	if (!cmdline_paddr)
+		return;
+
+	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false);
+}
+
+void __init sme_map_bootdata(char *real_mode_data)
+{
+	struct boot_params *boot_data;
+	unsigned long cmdline_paddr;
+
+	if (!sme_active())
+		return;
+
+	__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
+
+	/* Get the command line address after mapping the real_mode_data */
+	boot_data = (struct boot_params *)real_mode_data;
+	cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
+
+	if (!cmdline_paddr)
+		return;
+
+	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
+}
+
+void __init sme_early_init(void)
+{
+	unsigned int i;
+
+	if (!sme_me_mask)
+		return;
+
+	early_pmd_flags = __sme_set(early_pmd_flags);
+
+	__supported_pte_mask = __sme_set(__supported_pte_mask);
+
+	/* Update the protection map with memory encryption mask */
+	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
+		protection_map[i] = pgprot_encrypted(protection_map[i]);
+}
+
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void)
+{
+	if (!sme_me_mask)
+		return;
+
+	/* Call into SWIOTLB to update the SWIOTLB DMA buffers */
+	swiotlb_update_mem_attributes();
+
+	pr_info("AMD Secure Memory Encryption (SME) active\n");
+}
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
+{
+	WARN(PAGE_ALIGN(size) != size,
+	     "size is not page-aligned (%#lx)\n", size);
+
+	/* Make the SWIOTLB buffer area decrypted */
+	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+}
+
+static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
+				 unsigned long end)
+{
+	unsigned long pgd_start, pgd_end, pgd_size;
+	pgd_t *pgd_p;
+
+	pgd_start = start & PGDIR_MASK;
+	pgd_end = end & PGDIR_MASK;
+
+	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
+	pgd_size *= sizeof(pgd_t);
+
+	pgd_p = pgd_base + pgd_index(start);
+
+	memset(pgd_p, 0, pgd_size);
+}
+
+#define PGD_FLAGS	_KERNPG_TABLE_NOENC
+#define P4D_FLAGS	_KERNPG_TABLE_NOENC
+#define PUD_FLAGS	_KERNPG_TABLE_NOENC
+#define PMD_FLAGS	(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
+				     unsigned long vaddr, pmdval_t pmd_val)
+{
+	pgd_t *pgd_p;
+	p4d_t *p4d_p;
+	pud_t *pud_p;
+	pmd_t *pmd_p;
+
+	pgd_p = pgd_base + pgd_index(vaddr);
+	if (native_pgd_val(*pgd_p)) {
+		if (IS_ENABLED(CONFIG_X86_5LEVEL))
+			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+		else
+			pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+	} else {
+		pgd_t pgd;
+
+		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+			p4d_p = pgtable_area;
+			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+
+			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
+		} else {
+			pud_p = pgtable_area;
+			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
+		}
+		native_set_pgd(pgd_p, pgd);
+	}
+
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		p4d_p += p4d_index(vaddr);
+		if (native_p4d_val(*p4d_p)) {
+			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
+		} else {
+			p4d_t p4d;
+
+			pud_p = pgtable_area;
+			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
+			native_set_p4d(p4d_p, p4d);
+		}
+	}
+
+	pud_p += pud_index(vaddr);
+	if (native_pud_val(*pud_p)) {
+		if (native_pud_val(*pud_p) & _PAGE_PSE)
+			goto out;
+
+		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
+	} else {
+		pud_t pud;
+
+		pmd_p = pgtable_area;
+		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
+		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+
+		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
+		native_set_pud(pud_p, pud);
+	}
+
+	pmd_p += pmd_index(vaddr);
+	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
+		native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+
+out:
+	return pgtable_area;
+}
+
+static unsigned long __init sme_pgtable_calc(unsigned long len)
+{
+	unsigned long p4d_size, pud_size, pmd_size;
+	unsigned long total;
+
+	/*
+	 * Perform a relatively simplistic calculation of the pagetable
+	 * entries that are needed. That mappings will be covered by 2MB
+	 * PMD entries so we can conservatively calculate the required
+	 * number of P4D, PUD and PMD structures needed to perform the
+	 * mappings. Incrementing the count for each covers the case where
+	 * the addresses cross entries.
+	 */
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+		p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+		pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
+		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+	} else {
+		p4d_size = 0;
+		pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+	}
+	pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
+	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+	total = p4d_size + pud_size + pmd_size;
+
+	/*
+	 * Now calculate the added pagetable structures needed to populate
+	 * the new pagetables.
+	 */
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+		p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+		pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
+		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+	} else {
+		p4d_size = 0;
+		pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+	}
+	pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
+	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+	total += p4d_size + pud_size + pmd_size;
+
+	return total;
+}
+
+void __init sme_encrypt_kernel(void)
+{
+	unsigned long workarea_start, workarea_end, workarea_len;
+	unsigned long execute_start, execute_end, execute_len;
+	unsigned long kernel_start, kernel_end, kernel_len;
+	unsigned long pgtable_area_len;
+	unsigned long paddr, pmd_flags;
+	unsigned long decrypted_base;
+	void *pgtable_area;
+	pgd_t *pgd;
+
+	if (!sme_active())
+		return;
+
+	/*
+	 * Prepare for encrypting the kernel by building new pagetables with
+	 * the necessary attributes needed to encrypt the kernel in place.
+	 *
+	 *   One range of virtual addresses will map the memory occupied
+	 *   by the kernel as encrypted.
+	 *
+	 *   Another range of virtual addresses will map the memory occupied
+	 *   by the kernel as decrypted and write-protected.
+	 *
+	 *     The use of write-protect attribute will prevent any of the
+	 *     memory from being cached.
+	 */
+
+	/* Physical addresses gives us the identity mapped virtual addresses */
+	kernel_start = __pa_symbol(_text);
+	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+	kernel_len = kernel_end - kernel_start;
+
+	/* Set the encryption workarea to be immediately after the kernel */
+	workarea_start = kernel_end;
+
+	/*
+	 * Calculate required number of workarea bytes needed:
+	 *   executable encryption area size:
+	 *     stack page (PAGE_SIZE)
+	 *     encryption routine page (PAGE_SIZE)
+	 *     intermediate copy buffer (PMD_PAGE_SIZE)
+	 *   pagetable structures for the encryption of the kernel
+	 *   pagetable structures for workarea (in case not currently mapped)
+	 */
+	execute_start = workarea_start;
+	execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+	execute_len = execute_end - execute_start;
+
+	/*
+	 * One PGD for both encrypted and decrypted mappings and a set of
+	 * PUDs and PMDs for each of the encrypted and decrypted mappings.
+	 */
+	pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
+	pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+
+	/* PUDs and PMDs needed in the current pagetables for the workarea */
+	pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
+
+	/*
+	 * The total workarea includes the executable encryption area and
+	 * the pagetable area.
+	 */
+	workarea_len = execute_len + pgtable_area_len;
+	workarea_end = workarea_start + workarea_len;
+
+	/*
+	 * Set the address to the start of where newly created pagetable
+	 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
+	 * structures are created when the workarea is added to the current
+	 * pagetables and when the new encrypted and decrypted kernel
+	 * mappings are populated.
+	 */
+	pgtable_area = (void *)execute_end;
+
+	/*
+	 * Make sure the current pagetable structure has entries for
+	 * addressing the workarea.
+	 */
+	pgd = (pgd_t *)native_read_cr3_pa();
+	paddr = workarea_start;
+	while (paddr < workarea_end) {
+		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+						paddr,
+						paddr + PMD_FLAGS);
+
+		paddr += PMD_PAGE_SIZE;
+	}
+
+	/* Flush the TLB - no globals so cr3 is enough */
+	native_write_cr3(__native_read_cr3());
+
+	/*
+	 * A new pagetable structure is being built to allow for the kernel
+	 * to be encrypted. It starts with an empty PGD that will then be
+	 * populated with new PUDs and PMDs as the encrypted and decrypted
+	 * kernel mappings are created.
+	 */
+	pgd = pgtable_area;
+	memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
+	pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
+
+	/* Add encrypted kernel (identity) mappings */
+	pmd_flags = PMD_FLAGS | _PAGE_ENC;
+	paddr = kernel_start;
+	while (paddr < kernel_end) {
+		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+						paddr,
+						paddr + pmd_flags);
+
+		paddr += PMD_PAGE_SIZE;
+	}
+
+	/*
+	 * A different PGD index/entry must be used to get different
+	 * pagetable entries for the decrypted mapping. Choose the next
+	 * PGD index and convert it to a virtual address to be used as
+	 * the base of the mapping.
+	 */
+	decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+	decrypted_base <<= PGDIR_SHIFT;
+
+	/* Add decrypted, write-protected kernel (non-identity) mappings */
+	pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
+	paddr = kernel_start;
+	while (paddr < kernel_end) {
+		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+						paddr + decrypted_base,
+						paddr + pmd_flags);
+
+		paddr += PMD_PAGE_SIZE;
+	}
+
+	/* Add decrypted workarea mappings to both kernel mappings */
+	paddr = workarea_start;
+	while (paddr < workarea_end) {
+		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+						paddr,
+						paddr + PMD_FLAGS);
+
+		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+						paddr + decrypted_base,
+						paddr + PMD_FLAGS);
+
+		paddr += PMD_PAGE_SIZE;
+	}
+
+	/* Perform the encryption */
+	sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
+			    kernel_len, workarea_start, (unsigned long)pgd);
+
+	/*
+	 * At this point we are running encrypted.  Remove the mappings for
+	 * the decrypted areas - all that is needed for this is to remove
+	 * the PGD entry/entries.
+	 */
+	sme_clear_pgd(pgd, kernel_start + decrypted_base,
+		      kernel_end + decrypted_base);
+
+	sme_clear_pgd(pgd, workarea_start + decrypted_base,
+		      workarea_end + decrypted_base);
+
+	/* Flush the TLB - no globals so cr3 is enough */
+	native_write_cr3(__native_read_cr3());
+}
+
+void __init __nostackprotector sme_enable(struct boot_params *bp)
+{
+	const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+	unsigned int eax, ebx, ecx, edx;
+	bool active_by_default;
+	unsigned long me_mask;
+	char buffer[16];
+	u64 msr;
+
+	/* Check for the SME support leaf */
+	eax = 0x80000000;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	if (eax < 0x8000001f)
+		return;
+
+	/*
+	 * Check for the SME feature:
+	 *   CPUID Fn8000_001F[EAX] - Bit 0
+	 *     Secure Memory Encryption support
+	 *   CPUID Fn8000_001F[EBX] - Bits 5:0
+	 *     Pagetable bit position used to indicate encryption
+	 */
+	eax = 0x8000001f;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	if (!(eax & 1))
+		return;
+
+	me_mask = 1UL << (ebx & 0x3f);
+
+	/* Check if SME is enabled */
+	msr = __rdmsr(MSR_K8_SYSCFG);
+	if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+		return;
+
+	/*
+	 * Fixups have not been applied to phys_base yet and we're running
+	 * identity mapped, so we must obtain the address to the SME command
+	 * line argument data using rip-relative addressing.
+	 */
+	asm ("lea sme_cmdline_arg(%%rip), %0"
+	     : "=r" (cmdline_arg)
+	     : "p" (sme_cmdline_arg));
+	asm ("lea sme_cmdline_on(%%rip), %0"
+	     : "=r" (cmdline_on)
+	     : "p" (sme_cmdline_on));
+	asm ("lea sme_cmdline_off(%%rip), %0"
+	     : "=r" (cmdline_off)
+	     : "p" (sme_cmdline_off));
+
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
+		active_by_default = true;
+	else
+		active_by_default = false;
+
+	cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+				     ((u64)bp->ext_cmd_line_ptr << 32));
+
+	cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+
+	if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+		sme_me_mask = me_mask;
+	else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
+		sme_me_mask = 0;
+	else
+		sme_me_mask = active_by_default ? me_mask : 0;
+}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
new file mode 100644
index 000000000000..730e6d541df1
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -0,0 +1,149 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
+
+	.text
+	.code64
+ENTRY(sme_encrypt_execute)
+
+	/*
+	 * Entry parameters:
+	 *   RDI - virtual address for the encrypted kernel mapping
+	 *   RSI - virtual address for the decrypted kernel mapping
+	 *   RDX - length of kernel
+	 *   RCX - virtual address of the encryption workarea, including:
+	 *     - stack page (PAGE_SIZE)
+	 *     - encryption routine page (PAGE_SIZE)
+	 *     - intermediate copy buffer (PMD_PAGE_SIZE)
+	 *    R8 - physcial address of the pagetables to use for encryption
+	 */
+
+	push	%rbp
+	movq	%rsp, %rbp		/* RBP now has original stack pointer */
+
+	/* Set up a one page stack in the non-encrypted memory area */
+	movq	%rcx, %rax		/* Workarea stack page */
+	leaq	PAGE_SIZE(%rax), %rsp	/* Set new stack pointer */
+	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */
+
+	push	%r12
+	movq	%rdi, %r10		/* Encrypted kernel */
+	movq	%rsi, %r11		/* Decrypted kernel */
+	movq	%rdx, %r12		/* Kernel length */
+
+	/* Copy encryption routine into the workarea */
+	movq	%rax, %rdi				/* Workarea encryption routine */
+	leaq	__enc_copy(%rip), %rsi			/* Encryption routine */
+	movq	$(.L__enc_copy_end - __enc_copy), %rcx	/* Encryption routine length */
+	rep	movsb
+
+	/* Setup registers for call */
+	movq	%r10, %rdi		/* Encrypted kernel */
+	movq	%r11, %rsi		/* Decrypted kernel */
+	movq	%r8, %rdx		/* Pagetables used for encryption */
+	movq	%r12, %rcx		/* Kernel length */
+	movq	%rax, %r8		/* Workarea encryption routine */
+	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
+
+	call	*%rax			/* Call the encryption routine */
+
+	pop	%r12
+
+	movq	%rbp, %rsp		/* Restore original stack pointer */
+	pop	%rbp
+
+	ret
+ENDPROC(sme_encrypt_execute)
+
+ENTRY(__enc_copy)
+/*
+ * Routine used to encrypt kernel.
+ *   This routine must be run outside of the kernel proper since
+ *   the kernel will be encrypted during the process. So this
+ *   routine is defined here and then copied to an area outside
+ *   of the kernel where it will remain and run decrypted
+ *   during execution.
+ *
+ *   On entry the registers must be:
+ *     RDI - virtual address for the encrypted kernel mapping
+ *     RSI - virtual address for the decrypted kernel mapping
+ *     RDX - address of the pagetables to use for encryption
+ *     RCX - length of kernel
+ *      R8 - intermediate copy buffer
+ *
+ *     RAX - points to this routine
+ *
+ * The kernel will be encrypted by copying from the non-encrypted
+ * kernel space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted kernel space. The physical
+ * addresses of the two kernel space mappings are the same which
+ * results in the kernel being encrypted "in place".
+ */
+	/* Enable the new page tables */
+	mov	%rdx, %cr3
+
+	/* Flush any global TLBs */
+	mov	%cr4, %rdx
+	andq	$~X86_CR4_PGE, %rdx
+	mov	%rdx, %cr4
+	orq	$X86_CR4_PGE, %rdx
+	mov	%rdx, %cr4
+
+	/* Set the PAT register PA5 entry to write-protect */
+	push	%rcx
+	movl	$MSR_IA32_CR_PAT, %ecx
+	rdmsr
+	push	%rdx			/* Save original PAT value */
+	andl	$0xffff00ff, %edx	/* Clear PA5 */
+	orl	$0x00000500, %edx	/* Set PA5 to WP */
+	wrmsr
+	pop	%rdx			/* RDX contains original PAT value */
+	pop	%rcx
+
+	movq	%rcx, %r9		/* Save kernel length */
+	movq	%rdi, %r10		/* Save encrypted kernel address */
+	movq	%rsi, %r11		/* Save decrypted kernel address */
+
+	wbinvd				/* Invalidate any cache entries */
+
+	/* Copy/encrypt 2MB at a time */
+1:
+	movq	%r11, %rsi		/* Source - decrypted kernel */
+	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
+	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	rep	movsb
+
+	movq	%r8, %rsi		/* Source - intermediate copy buffer */
+	movq	%r10, %rdi		/* Dest   - encrypted kernel */
+	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	rep	movsb
+
+	addq	$PMD_PAGE_SIZE, %r11
+	addq	$PMD_PAGE_SIZE, %r10
+	subq	$PMD_PAGE_SIZE, %r9	/* Kernel length decrement */
+	jnz	1b			/* Kernel length not zero? */
+
+	/* Restore PAT register */
+	push	%rdx			/* Save original PAT value */
+	movl	$MSR_IA32_CR_PAT, %ecx
+	rdmsr
+	pop	%rdx			/* Restore original PAT value */
+	wrmsr
+
+	ret
+.L__enc_copy_end:
+ENDPROC(__enc_copy)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a88cfbfbd078..a99679826846 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -37,21 +37,21 @@ struct va_alignment __read_mostly va_align = {
 	.flags = -1,
 };
 
-unsigned long tasksize_32bit(void)
+unsigned long task_size_32bit(void)
 {
 	return IA32_PAGE_OFFSET;
 }
 
-unsigned long tasksize_64bit(void)
+unsigned long task_size_64bit(int full_addr_space)
 {
-	return TASK_SIZE_MAX;
+	return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
 }
 
 static unsigned long stack_maxrandom_size(unsigned long task_size)
 {
 	unsigned long max = 0;
 	if (current->flags & PF_RANDOMIZE) {
-		max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+		max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit());
 		max <<= PAGE_SHIFT;
 	}
 
@@ -141,7 +141,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 
 	arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
-			arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+			arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
 	/*
@@ -151,7 +151,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * mmap_base, the compat syscall uses mmap_compat_base.
 	 */
 	arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
-			arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+			arch_rnd(mmap32_rnd_bits), task_size_32bit());
 #endif
 }
 
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 1c34b767c84c..9ceaa955d2ba 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -355,10 +355,19 @@ int mpx_enable_management(void)
 	 */
 	bd_base = mpx_get_bounds_dir();
 	down_write(&mm->mmap_sem);
+
+	/* MPX doesn't support addresses above 47 bits yet. */
+	if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
+		pr_warn_once("%s (%d): MPX cannot handle addresses "
+				"above 47-bits. Disabling.",
+				current->comm, current->pid);
+		ret = -ENXIO;
+		goto out;
+	}
 	mm->context.bd_addr = bd_base;
 	if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
 		ret = -ENXIO;
-
+out:
 	up_write(&mm->mmap_sem);
 	return ret;
 }
@@ -1030,3 +1039,25 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (ret)
 		force_sig(SIGSEGV, current);
 }
+
+/* MPX cannot handle addresses above 47 bits yet. */
+unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	if (!kernel_managing_mpx_tables(current->mm))
+		return addr;
+	if (addr + len <= DEFAULT_MAP_WINDOW)
+		return addr;
+	if (flags & MAP_FIXED)
+		return -ENOMEM;
+
+	/*
+	 * Requested len is larger than the whole area we're allowed to map in.
+	 * Resetting hinting address wouldn't do much good -- fail early.
+	 */
+	if (len > DEFAULT_MAP_WINDOW)
+		return -ENOMEM;
+
+	/* Look for unmap area within DEFAULT_MAP_WINDOW */
+	return 0;
+}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a8f90ce3dedf..d805162e6045 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -75,13 +75,15 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
 
 /*
  * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr.  The return value is the number of nodes allocated.
+ * to max_addr.
+ *
+ * Returns zero on success or negative on error.
  */
 static int __init split_nodes_interleave(struct numa_meminfo *ei,
 					 struct numa_meminfo *pi,
 					 u64 addr, u64 max_addr, int nr_nodes)
 {
-	nodemask_t physnode_mask = NODE_MASK_NONE;
+	nodemask_t physnode_mask = numa_nodes_parsed;
 	u64 size;
 	int big;
 	int nid = 0;
@@ -116,9 +118,6 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 		return -1;
 	}
 
-	for (i = 0; i < pi->nr_blks; i++)
-		node_set(pi->blk[i].nid, physnode_mask);
-
 	/*
 	 * Continue to fill physical nodes with fake nodes until there is no
 	 * memory left on any of them.
@@ -200,13 +199,15 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
 
 /*
  * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'.  The return value is the number of nodes allocated.
+ * `addr' to `max_addr'.
+ *
+ * Returns zero on success or negative on error.
  */
 static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 					      struct numa_meminfo *pi,
 					      u64 addr, u64 max_addr, u64 size)
 {
-	nodemask_t physnode_mask = NODE_MASK_NONE;
+	nodemask_t physnode_mask = numa_nodes_parsed;
 	u64 min_size;
 	int nid = 0;
 	int i, ret;
@@ -231,9 +232,6 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	}
 	size &= FAKE_NODE_MIN_HASH_MASK;
 
-	for (i = 0; i < pi->nr_blks; i++)
-		node_set(pi->blk[i].nid, physnode_mask);
-
 	/*
 	 * Fill physical nodes with fake nodes of size until there is no memory
 	 * left on any of them.
@@ -280,6 +278,22 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	return 0;
 }
 
+int __init setup_emu2phys_nid(int *dfl_phys_nid)
+{
+	int i, max_emu_nid = 0;
+
+	*dfl_phys_nid = NUMA_NO_NODE;
+	for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
+		if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
+			max_emu_nid = i;
+			if (*dfl_phys_nid == NUMA_NO_NODE)
+				*dfl_phys_nid = emu_nid_to_phys[i];
+		}
+	}
+
+	return max_emu_nid;
+}
+
 /**
  * numa_emulation - Emulate NUMA nodes
  * @numa_meminfo: NUMA configuration to massage
@@ -376,23 +390,18 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	 * Determine the max emulated nid and the default phys nid to use
 	 * for unmapped nodes.
 	 */
-	max_emu_nid = 0;
-	dfl_phys_nid = NUMA_NO_NODE;
-	for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
-		if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
-			max_emu_nid = i;
-			if (dfl_phys_nid == NUMA_NO_NODE)
-				dfl_phys_nid = emu_nid_to_phys[i];
-		}
-	}
-	if (dfl_phys_nid == NUMA_NO_NODE) {
-		pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
-		goto no_emu;
-	}
+	max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid);
 
 	/* commit */
 	*numa_meminfo = ei;
 
+	/* Make sure numa_nodes_parsed only contains emulated nodes */
+	nodes_clear(numa_nodes_parsed);
+	for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
+		if (ei.blk[i].start != ei.blk[i].end &&
+		    ei.blk[i].nid != NUMA_NO_NODE)
+			node_set(ei.blk[i].nid, numa_nodes_parsed);
+
 	/*
 	 * Transform __apicid_to_node table to use emulated nids by
 	 * reverse-mapping phys_nid.  The maps should always exist but fall
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 757b0bcdf712..dfb7d657cf43 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1775,6 +1775,70 @@ int set_memory_4k(unsigned long addr, int numpages)
 					__pgprot(0), 1, 0, NULL);
 }
 
+static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+{
+	struct cpa_data cpa;
+	unsigned long start;
+	int ret;
+
+	/* Nothing to do if the SME is not active */
+	if (!sme_active())
+		return 0;
+
+	/* Should not be working on unaligned addresses */
+	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
+		addr &= PAGE_MASK;
+
+	start = addr;
+
+	memset(&cpa, 0, sizeof(cpa));
+	cpa.vaddr = &addr;
+	cpa.numpages = numpages;
+	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
+	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+	cpa.pgd = init_mm.pgd;
+
+	/* Must avoid aliasing mappings in the highmem code */
+	kmap_flush_unused();
+	vm_unmap_aliases();
+
+	/*
+	 * Before changing the encryption attribute, we need to flush caches.
+	 */
+	if (static_cpu_has(X86_FEATURE_CLFLUSH))
+		cpa_flush_range(start, numpages, 1);
+	else
+		cpa_flush_all(1);
+
+	ret = __change_page_attr_set_clr(&cpa, 1);
+
+	/*
+	 * After changing the encryption attribute, we need to flush TLBs
+	 * again in case any speculative TLB caching occurred (but no need
+	 * to flush caches again).  We could just use cpa_flush_all(), but
+	 * in case TLB flushing gets optimized in the cpa_flush_range()
+	 * path use the same logic as above.
+	 */
+	if (static_cpu_has(X86_FEATURE_CLFLUSH))
+		cpa_flush_range(start, numpages, 0);
+	else
+		cpa_flush_all(0);
+
+	return ret;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+	return __set_memory_enc_dec(addr, numpages, true);
+}
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+	return __set_memory_enc_dec(addr, numpages, false);
+}
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
+
 int set_pages_uc(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
@@ -2020,6 +2084,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 	if (!(page_flags & _PAGE_RW))
 		cpa.mask_clr = __pgprot(_PAGE_RW);
 
+	if (!(page_flags & _PAGE_ENC))
+		cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
+
 	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
 
 	retval = __change_page_attr_set_clr(&cpa, 0);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 45979502f64b..fe7d57a8fb60 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -293,7 +293,7 @@ void init_cache_modes(void)
  * pat_init - Initialize PAT MSR and PAT table
  *
  * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC and WT.
+ * to enable additional cache attributes, WC, WT and WP.
  *
  * This function must be called on all CPUs using the specific sequence of
  * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
@@ -352,7 +352,7 @@ void pat_init(void)
 		 *      010    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
 		 *      011    3    UC : _PAGE_CACHE_MODE_UC
 		 *      100    4    WB : Reserved
-		 *      101    5    WC : Reserved
+		 *      101    5    WP : _PAGE_CACHE_MODE_WP
 		 *      110    6    UC-: Reserved
 		 *      111    7    WT : _PAGE_CACHE_MODE_WT
 		 *
@@ -360,7 +360,7 @@ void pat_init(void)
 		 * corresponding types in the presence of PAT errata.
 		 */
 		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
-		      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+		      PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
 	}
 
 	if (!boot_cpu_done) {
@@ -744,6 +744,9 @@ EXPORT_SYMBOL(arch_io_free_memtype_wc);
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t vma_prot)
 {
+	if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size))
+		vma_prot = pgprot_decrypted(vma_prot);
+
 	return vma_prot;
 }
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 508a708eb9a6..218834a3e9ad 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -56,7 +56,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
-	tlb_remove_page(tlb, pte);
+	tlb_remove_table(tlb, pte);
 }
 
 #if CONFIG_PGTABLE_LEVELS > 2
@@ -72,21 +72,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 	tlb->need_flush_all = 1;
 #endif
 	pgtable_pmd_page_dtor(page);
-	tlb_remove_page(tlb, page);
+	tlb_remove_table(tlb, page);
 }
 
 #if CONFIG_PGTABLE_LEVELS > 3
 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-	tlb_remove_page(tlb, virt_to_page(pud));
+	tlb_remove_table(tlb, virt_to_page(pud));
 }
 
 #if CONFIG_PGTABLE_LEVELS > 4
 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
 {
 	paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
-	tlb_remove_page(tlb, virt_to_page(p4d));
+	tlb_remove_table(tlb, virt_to_page(p4d));
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 014d07a80053..dbbcfd59726a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,42 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+
+static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+			    u16 *new_asid, bool *need_flush)
+{
+	u16 asid;
+
+	if (!static_cpu_has(X86_FEATURE_PCID)) {
+		*new_asid = 0;
+		*need_flush = true;
+		return;
+	}
+
+	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+		if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
+		    next->context.ctx_id)
+			continue;
+
+		*new_asid = asid;
+		*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
+			       next_tlb_gen);
+		return;
+	}
+
+	/*
+	 * We don't currently own an ASID slot on this CPU.
+	 * Allocate a slot.
+	 */
+	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+	if (*new_asid >= TLB_NR_DYN_ASIDS) {
+		*new_asid = 0;
+		this_cpu_write(cpu_tlbstate.next_asid, 1);
+	}
+	*need_flush = true;
+}
+
 void leave_mm(int cpu)
 {
 	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -43,12 +79,11 @@ void leave_mm(int cpu)
 	if (loaded_mm == &init_mm)
 		return;
 
-	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-		BUG();
+	/* Warn if we're not lazy. */
+	WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
 
 	switch_mm(NULL, &init_mm, NULL);
 }
-EXPORT_SYMBOL_GPL(leave_mm);
 
 void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	       struct task_struct *tsk)
@@ -63,115 +98,263 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
-	unsigned cpu = smp_processor_id();
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned cpu = smp_processor_id();
+	u64 next_tlb_gen;
 
 	/*
-	 * NB: The scheduler will call us with prev == next when
-	 * switching from lazy TLB mode to normal mode if active_mm
-	 * isn't changing.  When this happens, there is no guarantee
-	 * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
+	 * NB: The scheduler will call us with prev == next when switching
+	 * from lazy TLB mode to normal mode if active_mm isn't changing.
+	 * When this happens, we don't assume that CR3 (and hence
+	 * cpu_tlbstate.loaded_mm) matches next.
 	 *
 	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
 	 */
 
-	this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+	/* We don't want flush_tlb_func_* to run concurrently with us. */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
+		WARN_ON_ONCE(!irqs_disabled());
+
+	/*
+	 * Verify that CR3 is what we think it is.  This will catch
+	 * hypothetical buggy code that directly switches to swapper_pg_dir
+	 * without going through leave_mm() / switch_mm_irqs_off() or that
+	 * does something like write_cr3(read_cr3_pa()).
+	 */
+	VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
 
 	if (real_prev == next) {
+		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+			  next->context.ctx_id);
+
+		if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
+			/*
+			 * There's nothing to do: we weren't lazy, and we
+			 * aren't changing our mm.  We don't need to flush
+			 * anything, nor do we need to update CR3, CR4, or
+			 * LDTR.
+			 */
+			return;
+		}
+
+		/* Resume remote flushes and then read tlb_gen. */
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+
+		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
+		    next_tlb_gen) {
+			/*
+			 * Ideally, we'd have a flush_tlb() variant that
+			 * takes the known CR3 value as input.  This would
+			 * be faster on Xen PV and on hypothetical CPUs
+			 * on which INVPCID is fast.
+			 */
+			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
+				       next_tlb_gen);
+			write_cr3(__sme_pa(next->pgd) | prev_asid);
+			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+					TLB_FLUSH_ALL);
+		}
+
 		/*
-		 * There's nothing to do: we always keep the per-mm control
-		 * regs in sync with cpu_tlbstate.loaded_mm.  Just
-		 * sanity-check mm_cpumask.
+		 * We just exited lazy mode, which means that CR4 and/or LDTR
+		 * may be stale.  (Changes to the required CR4 and LDTR states
+		 * are not reflected in tlb_gen.)
 		 */
-		if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
-			cpumask_set_cpu(cpu, mm_cpumask(next));
-		return;
-	}
+	} else {
+		u16 new_asid;
+		bool need_flush;
+
+		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+			/*
+			 * If our current stack is in vmalloc space and isn't
+			 * mapped in the new pgd, we'll double-fault.  Forcibly
+			 * map it.
+			 */
+			unsigned int index = pgd_index(current_stack_pointer());
+			pgd_t *pgd = next->pgd + index;
+
+			if (unlikely(pgd_none(*pgd)))
+				set_pgd(pgd, init_mm.pgd[index]);
+		}
+
+		/* Stop remote flushes for the previous mm */
+		if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
+			cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+
+		VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
 
-	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
 		/*
-		 * If our current stack is in vmalloc space and isn't
-		 * mapped in the new pgd, we'll double-fault.  Forcibly
-		 * map it.
+		 * Start remote flushes and then read tlb_gen.
 		 */
-		unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
-
-		pgd_t *pgd = next->pgd + stack_pgd_index;
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+
+		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+
+		if (need_flush) {
+			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+			write_cr3(__sme_pa(next->pgd) | new_asid);
+			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+					TLB_FLUSH_ALL);
+		} else {
+			/* The new ASID is already up to date. */
+			write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+		}
 
-		if (unlikely(pgd_none(*pgd)))
-			set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+		this_cpu_write(cpu_tlbstate.loaded_mm, next);
+		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 	}
 
-	this_cpu_write(cpu_tlbstate.loaded_mm, next);
+	load_mm_cr4(next);
+	switch_ldt(real_prev, next);
+}
 
-	WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
-	cpumask_set_cpu(cpu, mm_cpumask(next));
+/*
+ * Call this when reinitializing a CPU.  It fixes the following potential
+ * problems:
+ *
+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
+ *   because the CPU was taken down and came back up with CR3's PCID
+ *   bits clear.  CPU hotplug can do this.
+ *
+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
+ *
+ * - The CPU went so far out to lunch that it may have missed a TLB
+ *   flush.
+ */
+void initialize_tlbstate_and_flush(void)
+{
+	int i;
+	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
+	unsigned long cr3 = __read_cr3();
 
-	/*
-	 * Re-load page tables.
-	 *
-	 * This logic has an ordering constraint:
-	 *
-	 *  CPU 0: Write to a PTE for 'next'
-	 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
-	 *  CPU 1: set bit 1 in next's mm_cpumask
-	 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
-	 *
-	 * We need to prevent an outcome in which CPU 1 observes
-	 * the new PTE value and CPU 0 observes bit 1 clear in
-	 * mm_cpumask.  (If that occurs, then the IPI will never
-	 * be sent, and CPU 0's TLB will contain a stale entry.)
-	 *
-	 * The bad outcome can occur if either CPU's load is
-	 * reordered before that CPU's store, so both CPUs must
-	 * execute full barriers to prevent this from happening.
-	 *
-	 * Thus, switch_mm needs a full barrier between the
-	 * store to mm_cpumask and any operation that could load
-	 * from next->pgd.  TLB fills are special and can happen
-	 * due to instruction fetches or for no reason at all,
-	 * and neither LOCK nor MFENCE orders them.
-	 * Fortunately, load_cr3() is serializing and gives the
-	 * ordering guarantee we need.
-	 */
-	load_cr3(next->pgd);
+	/* Assert that CR3 already references the right mm. */
+	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
 
 	/*
-	 * This gets called via leave_mm() in the idle path where RCU
-	 * functions differently.  Tracing normally uses RCU, so we have to
-	 * call the tracepoint specially here.
+	 * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
+	 * doesn't work like other CR4 bits because it can only be set from
+	 * long mode.)
 	 */
-	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+	WARN_ON(boot_cpu_has(X86_CR4_PCIDE) &&
+		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
-	/* Stop flush ipis for the previous mm */
-	WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
-		     real_prev != &init_mm);
-	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+	/* Force ASID 0 and force a TLB flush. */
+	write_cr3(cr3 & ~CR3_PCID_MASK);
 
-	/* Load per-mm CR4 and LDTR state */
-	load_mm_cr4(next);
-	switch_ldt(real_prev, next);
+	/* Reinitialize tlbstate. */
+	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+	this_cpu_write(cpu_tlbstate.next_asid, 1);
+	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
+	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+
+	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
+		this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
 }
 
+/*
+ * flush_tlb_func_common()'s memory ordering requirement is that any
+ * TLB fills that happen after we flush the TLB are ordered after we
+ * read active_mm's tlb_gen.  We don't need any explicit barriers
+ * because all x86 flush operations are serializing and the
+ * atomic64_read operation won't be reordered by the compiler.
+ */
 static void flush_tlb_func_common(const struct flush_tlb_info *f,
 				  bool local, enum tlb_flush_reason reason)
 {
+	/*
+	 * We have three different tlb_gen values in here.  They are:
+	 *
+	 * - mm_tlb_gen:     the latest generation.
+	 * - local_tlb_gen:  the generation that this CPU has already caught
+	 *                   up to.
+	 * - f->new_tlb_gen: the generation that the requester of the flush
+	 *                   wants us to catch up to.
+	 */
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+
 	/* This code cannot presently handle being reentered. */
 	VM_WARN_ON(!irqs_disabled());
 
-	if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-		leave_mm(smp_processor_id());
+	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+		   loaded_mm->context.ctx_id);
+
+	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+		/*
+		 * We're in lazy mode -- don't flush.  We can get here on
+		 * remote flushes due to races and on local flushes if a
+		 * kernel thread coincidentally flushes the mm it's lazily
+		 * still using.
+		 */
 		return;
 	}
 
-	if (f->end == TLB_FLUSH_ALL) {
-		local_flush_tlb();
-		if (local)
-			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-		trace_tlb_flush(reason, TLB_FLUSH_ALL);
-	} else {
+	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
+		/*
+		 * There's nothing to do: we're already up to date.  This can
+		 * happen if two concurrent flushes happen -- the first flush to
+		 * be handled can catch us all the way up, leaving no work for
+		 * the second flush.
+		 */
+		trace_tlb_flush(reason, 0);
+		return;
+	}
+
+	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
+	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
+
+	/*
+	 * If we get to this point, we know that our TLB is out of date.
+	 * This does not strictly imply that we need to flush (it's
+	 * possible that f->new_tlb_gen <= local_tlb_gen), but we're
+	 * going to need to flush in the very near future, so we might
+	 * as well get it over with.
+	 *
+	 * The only question is whether to do a full or partial flush.
+	 *
+	 * We do a partial flush if requested and two extra conditions
+	 * are met:
+	 *
+	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
+	 *    we've always done all needed flushes to catch up to
+	 *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
+	 *    f->new_tlb_gen == 3, then we know that the flush needed to bring
+	 *    us up to date for tlb_gen 3 is the partial flush we're
+	 *    processing.
+	 *
+	 *    As an example of why this check is needed, suppose that there
+	 *    are two concurrent flushes.  The first is a full flush that
+	 *    changes context.tlb_gen from 1 to 2.  The second is a partial
+	 *    flush that changes context.tlb_gen from 2 to 3.  If they get
+	 *    processed on this CPU in reverse order, we'll see
+	 *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
+	 *    If we were to use __flush_tlb_single() and set local_tlb_gen to
+	 *    3, we'd be break the invariant: we'd update local_tlb_gen above
+	 *    1 without the full flush that's needed for tlb_gen 2.
+	 *
+	 * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
+	 *    Partial TLB flushes are not all that much cheaper than full TLB
+	 *    flushes, so it seems unlikely that it would be a performance win
+	 *    to do a partial flush if that won't bring our TLB fully up to
+	 *    date.  By doing a full flush instead, we can increase
+	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
+	 *    avoid another flush in the very near future.
+	 */
+	if (f->end != TLB_FLUSH_ALL &&
+	    f->new_tlb_gen == local_tlb_gen + 1 &&
+	    f->new_tlb_gen == mm_tlb_gen) {
+		/* Partial flush */
 		unsigned long addr;
 		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+
 		addr = f->start;
 		while (addr < f->end) {
 			__flush_tlb_single(addr);
@@ -180,7 +363,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 		if (local)
 			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
 		trace_tlb_flush(reason, nr_pages);
+	} else {
+		/* Full flush. */
+		local_flush_tlb();
+		if (local)
+			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+		trace_tlb_flush(reason, TLB_FLUSH_ALL);
 	}
+
+	/* Both paths above update our state to mm_tlb_gen. */
+	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
 }
 
 static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
@@ -214,6 +406,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 				(info->end - info->start) >> PAGE_SHIFT);
 
 	if (is_uv_system()) {
+		/*
+		 * This whole special case is confused.  UV has a "Broadcast
+		 * Assist Unit", which seems to be a fancy way to send IPIs.
+		 * Back when x86 used an explicit TLB flush IPI, UV was
+		 * optimized to use its own mechanism.  These days, x86 uses
+		 * smp_call_function_many(), but UV still uses a manual IPI,
+		 * and that IPI's action is out of date -- it does a manual
+		 * flush instead of calling flush_tlb_func_remote().  This
+		 * means that the percpu tlb_gen variables won't be updated
+		 * and we'll do pointless flushes on future context switches.
+		 *
+		 * Rather than hooking native_flush_tlb_others() here, I think
+		 * that UV should be updated so that smp_call_function_many(),
+		 * etc, are optimal on UV.
+		 */
 		unsigned int cpu;
 
 		cpu = smp_processor_id();
@@ -250,8 +457,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 
 	cpu = get_cpu();
 
-	/* Synchronize with switch_mm. */
-	smp_mb();
+	/* This is also a barrier that synchronizes with switch_mm(). */
+	info.new_tlb_gen = inc_mm_tlb_gen(mm);
 
 	/* Should we flush just the requested range? */
 	if ((end != TLB_FLUSH_ALL) &&
@@ -273,6 +480,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 
 	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), &info);
+
 	put_cpu();
 }
 
@@ -281,8 +489,6 @@ static void do_flush_tlb_all(void *info)
 {
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	__flush_tlb_all();
-	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
-		leave_mm(smp_processor_id());
 }
 
 void flush_tlb_all(void)
@@ -335,6 +541,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 
 	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
 		flush_tlb_others(&batch->cpumask, &info);
+
 	cpumask_clear(&batch->cpumask);
 
 	put_cpu();
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1324f280e06..8c9573660d51 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -94,7 +94,9 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JNE 0x75
 #define X86_JBE 0x76
 #define X86_JA  0x77
+#define X86_JL  0x7C
 #define X86_JGE 0x7D
+#define X86_JLE 0x7E
 #define X86_JG  0x7F
 
 static void bpf_flush_icache(void *start, void *end)
@@ -285,7 +287,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
 	      offsetof(struct bpf_array, map.max_entries));
 	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -294,21 +296,20 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 */
 	EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 	EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], eax */
 
 	/* prog = array->ptrs[index]; */
-	EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+	EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 		    offsetof(struct bpf_array, ptrs));
-	EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
 
 	/* if (prog == NULL)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x83, 0xF8, 0x00);            /* cmp rax, 0 */
+	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
 #define OFFSET3 10
 	EMIT2(X86_JE, OFFSET3);                   /* je out */
 	label3 = cnt;
@@ -888,9 +889,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JEQ | BPF_X:
 		case BPF_JMP | BPF_JNE | BPF_X:
 		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JLT | BPF_X:
 		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JLE | BPF_X:
 		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_X:
 			/* cmp dst_reg, src_reg */
 			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
 			      add_2reg(0xC0, dst_reg, src_reg));
@@ -911,9 +916,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JEQ | BPF_K:
 		case BPF_JMP | BPF_JNE | BPF_K:
 		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_K:
 		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_K:
 			/* cmp dst_reg, imm8/32 */
 			EMIT1(add_1mod(0x48, dst_reg));
 
@@ -935,18 +944,34 @@ emit_cond_jmp:		/* convert BPF opcode to x86 */
 				/* GT is unsigned '>', JA in x86 */
 				jmp_cond = X86_JA;
 				break;
+			case BPF_JLT:
+				/* LT is unsigned '<', JB in x86 */
+				jmp_cond = X86_JB;
+				break;
 			case BPF_JGE:
 				/* GE is unsigned '>=', JAE in x86 */
 				jmp_cond = X86_JAE;
 				break;
+			case BPF_JLE:
+				/* LE is unsigned '<=', JBE in x86 */
+				jmp_cond = X86_JBE;
+				break;
 			case BPF_JSGT:
 				/* signed '>', GT in x86 */
 				jmp_cond = X86_JG;
 				break;
+			case BPF_JSLT:
+				/* signed '<', LT in x86 */
+				jmp_cond = X86_JL;
+				break;
 			case BPF_JSGE:
 				/* signed '>=', GE in x86 */
 				jmp_cond = X86_JGE;
 				break;
+			case BPF_JSLE:
+				/* signed '<=', LE in x86 */
+				jmp_cond = X86_JLE;
+				break;
 			default: /* to silence gcc warning */
 				return -EFAULT;
 			}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index dbe2132b0ed4..7a5350d08cef 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -674,7 +674,7 @@ int pcibios_add_device(struct pci_dev *dev)
 
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
-		data = ioremap(pa_data, sizeof(*rom));
+		data = memremap(pa_data, sizeof(*rom), MEMREMAP_WB);
 		if (!data)
 			return -ENOMEM;
 
@@ -693,7 +693,7 @@ int pcibios_add_device(struct pci_dev *dev)
 			}
 		}
 		pa_data = data->next;
-		iounmap(data);
+		memunmap(data);
 	}
 	set_dma_domain_ops(dev);
 	set_dev_domain_options(dev);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 5a18aedcb341..b901ece278dd 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,16 +215,23 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	struct irq_alloc_info info;
 	int polarity;
 	int ret;
+	u8 gsi;
 
 	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
+	ret = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+	if (ret < 0) {
+		dev_warn(&dev->dev, "Failed to read interrupt line: %d\n", ret);
+		return ret;
+	}
+
 	switch (intel_mid_identify_cpu()) {
 	case INTEL_MID_CPU_CHIP_TANGIER:
 		polarity = IOAPIC_POL_HIGH;
 
 		/* Special treatment for IRQ0 */
-		if (dev->irq == 0) {
+		if (gsi == 0) {
 			/*
 			 * Skip HS UART common registers device since it has
 			 * IRQ0 assigned and not used by the kernel.
@@ -253,10 +260,11 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	 * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
 	 * IOAPIC RTE entries, so we just enable RTE for the device.
 	 */
-	ret = mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info);
+	ret = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
 	if (ret < 0)
 		return ret;
 
+	dev->irq = ret;
 	dev->irq_managed = 1;
 
 	return 0;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f084d8718ac4..928b6dceeca0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1032,25 +1032,6 @@ void __init efi_enter_virtual_mode(void)
 	efi_dump_pagetable();
 }
 
-/*
- * Convenience functions to obtain memory types and attributes
- */
-u32 efi_mem_type(unsigned long phys_addr)
-{
-	efi_memory_desc_t *md;
-
-	if (!efi_enabled(EFI_MEMMAP))
-		return 0;
-
-	for_each_efi_memory_desc(md) {
-		if ((md->phys_addr <= phys_addr) &&
-		    (phys_addr < (md->phys_addr +
-				  (md->num_pages << EFI_PAGE_SHIFT))))
-			return md->type;
-	}
-	return 0;
-}
-
 static int __init arch_parse_efi_cmdline(char *str)
 {
 	if (!str) {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 9bf72f5bfedb..12e83888e5b9 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -327,7 +327,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size)
 
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
-	unsigned long pfn, text;
+	unsigned long pfn, text, pf;
 	struct page *page;
 	unsigned npages;
 	pgd_t *pgd;
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
-	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+	/*
+	 * Since the PGD is encrypted, set the encryption mask so that when
+	 * this value is loaded into cr3 the PGD will be decrypted during
+	 * the pagetable walk.
+	 */
+	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
 	pgd = efi_pgd;
 
 	/*
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	 * phys_efi_set_virtual_address_map().
 	 */
 	pfn = pa_memmap >> PAGE_SHIFT;
-	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
+	pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
 		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
 		return 1;
 	}
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	text = __pa(_text);
 	pfn = text >> PAGE_SHIFT;
 
-	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+	pf = _PAGE_RW | _PAGE_ENC;
+	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
 		pr_err("Failed to map kernel text 1:1\n");
 		return 1;
 	}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 5a0483e7bf66..dc036e511f48 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 	return 0;
 }
 
-static struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
 	.setup	= tng_bt_sfi_setup,
 };
 
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 9e304e2ea4f5..4f5fa65a1011 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -30,13 +30,13 @@ static int tangier_probe(struct platform_device *pdev)
 {
 	struct irq_alloc_info info;
 	struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
-	int gsi, irq;
+	int gsi = TANGIER_EXT_TIMER0_MSI;
+	int irq;
 
 	if (!pdata)
 		return -EINVAL;
 
 	/* IOAPIC builds identity mapping between GSI and IRQ on MID */
-	gsi = pdata->irq;
 	ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
 	irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
 	if (irq < 0) {
@@ -44,11 +44,11 @@ static int tangier_probe(struct platform_device *pdev)
 		return irq;
 	}
 
+	pdata->irq = irq;
 	return 0;
 }
 
 static struct intel_mid_wdt_pdata tangier_pdata = {
-	.irq = TANGIER_EXT_TIMER0_MSI,
 	.probe = tangier_probe,
 };
 
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 12a272582cdc..86676cec99a1 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -183,6 +183,7 @@ void __init x86_intel_mid_early_setup(void)
 
 	x86_init.timers.timer_init = intel_mid_time_init;
 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
+	x86_init.timers.wallclock_init = intel_mid_rtc_init;
 
 	x86_init.irqs.pre_vector_init = x86_init_noop;
 
@@ -191,7 +192,6 @@ void __init x86_intel_mid_early_setup(void)
 	x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
 
 	x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
-	x86_init.timers.wallclock_init = intel_mid_rtc_init;
 	x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
 
 	x86_init.pci.init = intel_mid_pci_init;
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index ef03852ea6e8..49ec5b94c71f 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -444,7 +444,7 @@ static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states)
 static int pnw_set_initial_state(struct mid_pwr *pwr)
 {
 	/* On Penwell SRAM must stay powered on */
-	const u32 states[] = {
+	static const u32 states[] = {
 		0xf00fffff,		/* PM_SSC(0) */
 		0xffffffff,		/* PM_SSC(1) */
 		0xffffffff,		/* PM_SSC(2) */
@@ -455,7 +455,7 @@ static int pnw_set_initial_state(struct mid_pwr *pwr)
 
 static int tng_set_initial_state(struct mid_pwr *pwr)
 {
-	const u32 states[] = {
+	static const u32 states[] = {
 		0xffffffff,		/* PM_SSC(0) */
 		0xffffffff,		/* PM_SSC(1) */
 		0xffffffff,		/* PM_SSC(2) */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 78459a6d455a..4d68d59f457d 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -181,6 +181,7 @@ static void fix_processor_context(void)
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_mm_ldt(current->active_mm);	/* This does lldt */
+	initialize_tlbstate_and_flush();
 
 	fpu__resume_cpu();
 
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cd4be19c36dc..1f71980fc5e0 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,6 +1,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
+#include <linux/mem_encrypt.h>
 
 #include <asm/set_memory.h>
 #include <asm/pgtable.h>
@@ -59,6 +60,13 @@ static void __init setup_real_mode(void)
 
 	base = (unsigned char *)real_mode_header;
 
+	/*
+	 * If SME is active, the trampoline area will need to be in
+	 * decrypted memory in order to bring up other processors
+	 * successfully.
+	 */
+	set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
+
 	memcpy(base, real_mode_blob, size);
 
 	phys_base = __pa(base);
@@ -100,6 +108,10 @@ static void __init setup_real_mode(void)
 	trampoline_cr4_features = &trampoline_header->cr4;
 	*trampoline_cr4_features = mmu_cr4_features;
 
+	trampoline_header->flags = 0;
+	if (sme_active())
+		trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
+
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
 	trampoline_pgd[511] = init_top_pgt[511].pgd;
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20d2f9d..614fd7064d0a 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
 #include <asm/msr.h>
 #include <asm/segment.h>
 #include <asm/processor-flags.h>
+#include <asm/realmode.h>
 #include "realmode.h"
 
 	.text
@@ -92,6 +93,28 @@ ENTRY(startup_32)
 	movl	%edx, %fs
 	movl	%edx, %gs
 
+	/*
+	 * Check for memory encryption support. This is a safety net in
+	 * case BIOS hasn't done the necessary step of setting the bit in
+	 * the MSR for this AP. If SME is active and we've gotten this far
+	 * then it is safe for us to set the MSR bit and continue. If we
+	 * don't we'll eventually crash trying to execute encrypted
+	 * instructions.
+	 */
+	bt	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+	jnc	.Ldone
+	movl	$MSR_K8_SYSCFG, %ecx
+	rdmsr
+	bts	$MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+	jc	.Ldone
+
+	/*
+	 * Memory encryption is enabled but the SME enable bit for this
+	 * CPU has has not been set.  It is safe to set it, so do so.
+	 */
+	wrmsr
+.Ldone:
+
 	movl	pa_tr_cr4, %eax
 	movl	%eax, %cr4		# Enable PAE mode
 
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header)
 	tr_start:		.space	8
 	GLOBAL(tr_efer)		.space	8
 	GLOBAL(tr_cr4)		.space	4
+	GLOBAL(tr_flags)	.space	4
 END(trampoline_header)
 
 #include "trampoline_common.S"
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 027987638e98..1ecd419811a2 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -17,6 +17,9 @@ config XEN_PV
 	bool "Xen PV guest support"
 	default y
 	depends on XEN
+	# XEN_PV is not ready to work with 5-level paging.
+	# Changes to hypervisor are also required.
+	depends on !X86_5LEVEL
 	select XEN_HAVE_PVMMU
 	select XEN_HAVE_VPMU
 	help
@@ -75,4 +78,6 @@ config XEN_DEBUG_FS
 config XEN_PVH
 	bool "Support for running as a PVH guest"
 	depends on XEN && XEN_PVHVM && ACPI
+	# Pre-built page tables are not ready to handle 5-level paging.
+	depends on !X86_5LEVEL
 	def_bool n
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4ddb3f37..ae2a2e2d6362 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -263,6 +263,13 @@ static void __init xen_init_capabilities(void)
 	setup_clear_cpu_cap(X86_FEATURE_MTRR);
 	setup_clear_cpu_cap(X86_FEATURE_ACC);
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	setup_clear_cpu_cap(X86_FEATURE_SME);
+
+	/*
+	 * Xen PV would need some work to support PCID: CR3 handling as well
+	 * as xen_flush_tlb_others() would need updating.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
 
 	if (!xen_initial_domain())
 		setup_clear_cpu_cap(X86_FEATURE_ACPI);
@@ -494,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 static inline bool desc_equal(const struct desc_struct *d1,
 			      const struct desc_struct *d2)
 {
-	return d1->a == d2->a && d1->b == d2->b;
+	return !memcmp(d1, d2, sizeof(*d1));
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,
@@ -579,59 +586,91 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 	preempt_enable();
 }
 
+#ifdef CONFIG_X86_64
+struct trap_array_entry {
+	void (*orig)(void);
+	void (*xen)(void);
+	bool ist_okay;
+};
+
+static struct trap_array_entry trap_array[] = {
+	{ debug,                       xen_xendebug,                    true },
+	{ int3,                        xen_xenint3,                     true },
+	{ double_fault,                xen_double_fault,                true },
+#ifdef CONFIG_X86_MCE
+	{ machine_check,               xen_machine_check,               true },
+#endif
+	{ nmi,                         xen_nmi,                         true },
+	{ overflow,                    xen_overflow,                    false },
+#ifdef CONFIG_IA32_EMULATION
+	{ entry_INT80_compat,          xen_entry_INT80_compat,          false },
+#endif
+	{ page_fault,                  xen_page_fault,                  false },
+	{ divide_error,                xen_divide_error,                false },
+	{ bounds,                      xen_bounds,                      false },
+	{ invalid_op,                  xen_invalid_op,                  false },
+	{ device_not_available,        xen_device_not_available,        false },
+	{ coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
+	{ invalid_TSS,                 xen_invalid_TSS,                 false },
+	{ segment_not_present,         xen_segment_not_present,         false },
+	{ stack_segment,               xen_stack_segment,               false },
+	{ general_protection,          xen_general_protection,          false },
+	{ spurious_interrupt_bug,      xen_spurious_interrupt_bug,      false },
+	{ coprocessor_error,           xen_coprocessor_error,           false },
+	{ alignment_check,             xen_alignment_check,             false },
+	{ simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
+};
+
+static bool get_trap_addr(void **addr, unsigned int ist)
+{
+	unsigned int nr;
+	bool ist_okay = false;
+
+	/*
+	 * Replace trap handler addresses by Xen specific ones.
+	 * Check for known traps using IST and whitelist them.
+	 * The debugger ones are the only ones we care about.
+	 * Xen will handle faults like double_fault, * so we should never see
+	 * them.  Warn if there's an unexpected IST-using fault handler.
+	 */
+	for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
+		struct trap_array_entry *entry = trap_array + nr;
+
+		if (*addr == entry->orig) {
+			*addr = entry->xen;
+			ist_okay = entry->ist_okay;
+			break;
+		}
+	}
+
+	if (WARN_ON(ist != 0 && !ist_okay))
+		return false;
+
+	return true;
+}
+#endif
+
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
 	unsigned long addr;
 
-	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
+	if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
 		return 0;
 
 	info->vector = vector;
 
-	addr = gate_offset(*val);
+	addr = gate_offset(val);
 #ifdef CONFIG_X86_64
-	/*
-	 * Look for known traps using IST, and substitute them
-	 * appropriately.  The debugger ones are the only ones we care
-	 * about.  Xen will handle faults like double_fault,
-	 * so we should never see them.  Warn if
-	 * there's an unexpected IST-using fault handler.
-	 */
-	if (addr == (unsigned long)debug)
-		addr = (unsigned long)xen_debug;
-	else if (addr == (unsigned long)int3)
-		addr = (unsigned long)xen_int3;
-	else if (addr == (unsigned long)stack_segment)
-		addr = (unsigned long)xen_stack_segment;
-	else if (addr == (unsigned long)double_fault) {
-		/* Don't need to handle these */
+	if (!get_trap_addr((void **)&addr, val->bits.ist))
 		return 0;
-#ifdef CONFIG_X86_MCE
-	} else if (addr == (unsigned long)machine_check) {
-		/*
-		 * when xen hypervisor inject vMCE to guest,
-		 * use native mce handler to handle it
-		 */
-		;
-#endif
-	} else if (addr == (unsigned long)nmi)
-		/*
-		 * Use the native version as well.
-		 */
-		;
-	else {
-		/* Some other trap using IST? */
-		if (WARN_ON(val->ist != 0))
-			return 0;
-	}
 #endif	/* CONFIG_X86_64 */
 	info->address = addr;
 
-	info->cs = gate_segment(*val);
-	info->flags = val->dpl;
+	info->cs = gate_segment(val);
+	info->flags = val->bits.dpl;
 	/* interrupt gates clear IF */
-	if (val->type == GATE_INTERRUPT)
+	if (val->bits.type == GATE_INTERRUPT)
 		info->flags |= 1 << 2;
 
 	return 1;
@@ -981,59 +1020,6 @@ void __ref xen_setup_vcpu_info_placement(void)
 	}
 }
 
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
-			  unsigned long addr, unsigned len)
-{
-	char *start, *end, *reloc;
-	unsigned ret;
-
-	start = end = reloc = NULL;
-
-#define SITE(op, x)							\
-	case PARAVIRT_PATCH(op.x):					\
-	if (xen_have_vcpu_info_placement) {				\
-		start = (char *)xen_##x##_direct;			\
-		end = xen_##x##_direct_end;				\
-		reloc = xen_##x##_direct_reloc;				\
-	}								\
-	goto patch_site
-
-	switch (type) {
-		SITE(pv_irq_ops, irq_enable);
-		SITE(pv_irq_ops, irq_disable);
-		SITE(pv_irq_ops, save_fl);
-		SITE(pv_irq_ops, restore_fl);
-#undef SITE
-
-	patch_site:
-		if (start == NULL || (end-start) > len)
-			goto default_patch;
-
-		ret = paravirt_patch_insns(insnbuf, len, start, end);
-
-		/* Note: because reloc is assigned from something that
-		   appears to be an array, gcc assumes it's non-null,
-		   but doesn't know its relationship with start and
-		   end. */
-		if (reloc > start && reloc < end) {
-			int reloc_off = reloc - start;
-			long *relocp = (long *)(insnbuf + reloc_off);
-			long delta = start - (char *)addr;
-
-			*relocp += delta;
-		}
-		break;
-
-	default_patch:
-	default:
-		ret = paravirt_patch_default(type, clobbers, insnbuf,
-					     addr, len);
-		break;
-	}
-
-	return ret;
-}
-
 static const struct pv_info xen_info __initconst = {
 	.shared_kernel_pmd = 0,
 
@@ -1043,10 +1029,6 @@ static const struct pv_info xen_info __initconst = {
 	.name = "Xen",
 };
 
-static const struct pv_init_ops xen_init_ops __initconst = {
-	.patch = xen_patch,
-};
-
 static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.cpuid = xen_cpuid,
 
@@ -1244,7 +1226,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
-	pv_init_ops = xen_init_ops;
+	pv_init_ops.patch = paravirt_patch_default;
 	pv_cpu_ops = xen_cpu_ops;
 
 	x86_platform.get_nmi_reason = xen_get_nmi_reason;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 33e92955e09d..d4eff5676cfa 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
 
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
-#ifdef CONFIG_X86_64
-	.adjust_exception_frame = xen_adjust_exception_frame,
-#endif
 };
 
 void __init xen_init_irq_ops(void)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3be06f3caf3c..3e15345abfe7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -84,7 +84,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 	else
 		rmd->mfn++;
 
-	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
 	rmd->mmu_update->val = pte_val_ma(pte);
 	rmd->mmu_update++;
 
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index cab28cf2cffb..6b983b300666 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -162,26 +162,6 @@ static bool xen_page_pinned(void *ptr)
 	return PagePinned(page);
 }
 
-void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
-{
-	struct multicall_space mcs;
-	struct mmu_update *u;
-
-	trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
-
-	mcs = xen_mc_entry(sizeof(*u));
-	u = mcs.args;
-
-	/* ptep might be kmapped when using 32-bit HIGHPTE */
-	u->ptr = virt_to_machine(ptep).maddr;
-	u->val = pte_val_ma(pteval);
-
-	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
-
-	xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-EXPORT_SYMBOL_GPL(xen_set_domain_pte);
-
 static void xen_extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
@@ -1005,14 +985,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	/* Get the "official" set of cpus referring to our pagetable. */
 	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
 		for_each_online_cpu(cpu) {
-			if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
-			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+			if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
 				continue;
 			smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
 		}
 		return;
 	}
-	cpumask_copy(mask, mm_cpumask(mm));
 
 	/*
 	 * It's possible that a vcpu may have a stale reference to our
@@ -1021,6 +999,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	 * look at its actual current cr3 value, and force it to flush
 	 * if needed.
 	 */
+	cpumask_clear(mask);
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
 			cpumask_set_cpu(cpu, mask);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 276da636dd39..6083ba462f35 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -212,8 +212,7 @@ void __ref xen_build_mfn_list_list(void)
 	unsigned int level, topidx, mididx;
 	unsigned long *mid_mfn_p;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap) ||
-	    xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
+	if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
 		return;
 
 	/* Pre-initialize p2m_top_mfn to be completely missing */
@@ -269,9 +268,6 @@ void __ref xen_build_mfn_list_list(void)
 
 void xen_setup_mfn_list_list(void)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
@@ -291,9 +287,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
 {
 	unsigned long pfn;
 
-	 if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
 	xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
 	xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
 
@@ -540,9 +533,6 @@ int xen_alloc_p2m_entry(unsigned long pfn)
 	unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
 	unsigned long p2m_pfn;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	ptep = lookup_address(addr, &level);
 	BUG_ON(!ptep || level != PG_LEVEL_4K);
 	pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
@@ -640,9 +630,6 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 	if (unlikely(pfn_s >= xen_p2m_size))
 		return 0;
 
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
-		return pfn_e - pfn_s;
-
 	if (pfn_s > pfn_e)
 		return 0;
 
@@ -660,10 +647,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 	pte_t *ptep;
 	unsigned int level;
 
-	/* don't track P2M changes in autotranslate guests */
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
-		return true;
-
 	if (unlikely(pfn >= xen_p2m_size)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
 		return true;
@@ -711,9 +694,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 	int i, ret = 0;
 	pte_t *pte;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	if (kmap_ops) {
 		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
 						kmap_ops, count);
@@ -756,9 +736,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 {
 	int i, ret = 0;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 0;
-
 	for (i = 0; i < count; i++) {
 		unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
 		unsigned long pfn = page_to_pfn(pages[i]);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c81046323ebc..ac55c02f98e9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -340,8 +340,6 @@ static void __init xen_do_set_identity_and_remap_chunk(
 
 	WARN_ON(size == 0);
 
-	BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-
 	mfn_save = virt_to_mfn(buf);
 
 	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
@@ -1024,8 +1022,7 @@ void __init xen_pvmmu_arch_setup(void)
 void __init xen_arch_setup(void)
 {
 	xen_panic_handler_init();
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_pvmmu_arch_setup();
+	xen_pvmmu_arch_setup();
 
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index eff224df813f..dcd31fa39b5d 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -1,14 +1,8 @@
 /*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining.  The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
  *
  * We only bother with direct forms (ie, vcpu in percpu data) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
  */
 
 #include <asm/asm-offsets.h>
@@ -16,7 +10,7 @@
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
 
-#include "xen-asm.h"
+#include <linux/linkage.h>
 
 /*
  * Enable events.  This clears the event mask and tests the pending
@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 
-2:	call check_events
+	call check_events
 1:
-ENDPATCH(xen_irq_enable_direct)
 	FRAME_END
 	ret
 	ENDPROC(xen_irq_enable_direct)
-	RELOC(xen_irq_enable_direct, 2b+1)
 
 
 /*
@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
  */
 ENTRY(xen_irq_disable_direct)
 	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
 	ret
-	ENDPROC(xen_irq_disable_direct)
-	RELOC(xen_irq_disable_direct, 0)
+ENDPROC(xen_irq_disable_direct)
 
 /*
  * (xen_)save_fl is used to get the current interrupt enable status.
@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
 	addb %ah, %ah
-ENDPATCH(xen_save_fl_direct)
 	ret
 	ENDPROC(xen_save_fl_direct)
-	RELOC(xen_save_fl_direct, 0)
 
 
 /*
@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
 	/* check for unmasked and pending */
 	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jnz 1f
-2:	call check_events
+	call check_events
 1:
-ENDPATCH(xen_restore_fl_direct)
 	FRAME_END
 	ret
 	ENDPROC(xen_restore_fl_direct)
-	RELOC(xen_restore_fl_direct, 2b+1)
 
 
 /*
diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
deleted file mode 100644
index 465276467a47..000000000000
--- a/arch/x86/xen/xen-asm.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _XEN_XEN_ASM_H
-#define _XEN_XEN_ASM_H
-
-#include <linux/linkage.h>
-
-#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x)	.globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI	0x80000000
-
-#endif
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index feb6d40a0860..1200e262a116 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,14 +1,8 @@
 /*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining.  The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
  *
  * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
  */
 
 #include <asm/thread_info.h>
@@ -18,21 +12,10 @@
 
 #include <xen/interface/xen.h>
 
-#include "xen-asm.h"
+#include <linux/linkage.h>
 
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-check_events:
-	push %eax
-	push %ecx
-	push %edx
-	call xen_force_evtchn_callback
-	pop %edx
-	pop %ecx
-	pop %eax
-	ret
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI  0x80000000
 
 /*
  * This is run where a normal iret would be run, with the same stack setup:
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..dae2cc33afb5 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -1,14 +1,8 @@
 /*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining.  The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
  *
  * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
  */
 
 #include <asm/errno.h>
@@ -20,13 +14,44 @@
 
 #include <xen/interface/xen.h>
 
-#include "xen-asm.h"
+#include <linux/linkage.h>
+
+.macro xen_pv_trap name
+ENTRY(xen_\name)
+	pop %rcx
+	pop %r11
+	jmp  \name
+END(xen_\name)
+.endm
 
-ENTRY(xen_adjust_exception_frame)
-	mov 8+0(%rsp), %rcx
-	mov 8+8(%rsp), %r11
-	ret $16
-ENDPROC(xen_adjust_exception_frame)
+xen_pv_trap divide_error
+xen_pv_trap debug
+xen_pv_trap xendebug
+xen_pv_trap int3
+xen_pv_trap xenint3
+xen_pv_trap nmi
+xen_pv_trap overflow
+xen_pv_trap bounds
+xen_pv_trap invalid_op
+xen_pv_trap device_not_available
+xen_pv_trap double_fault
+xen_pv_trap coprocessor_segment_overrun
+xen_pv_trap invalid_TSS
+xen_pv_trap segment_not_present
+xen_pv_trap stack_segment
+xen_pv_trap general_protection
+xen_pv_trap page_fault
+xen_pv_trap spurious_interrupt_bug
+xen_pv_trap coprocessor_error
+xen_pv_trap alignment_check
+#ifdef CONFIG_X86_MCE
+xen_pv_trap machine_check
+#endif /* CONFIG_X86_MCE */
+xen_pv_trap simd_coprocessor_error
+#ifdef CONFIG_IA32_EMULATION
+xen_pv_trap entry_INT80_compat
+#endif
+xen_pv_trap hypervisor_callback
 
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
@@ -46,9 +71,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
  */
 ENTRY(xen_iret)
 	pushq $0
-1:	jmp hypercall_iret
-ENDPATCH(xen_iret)
-RELOC(xen_iret, 1b+1)
+	jmp hypercall_iret
 
 ENTRY(xen_sysret64)
 	/*
@@ -65,9 +88,7 @@ ENTRY(xen_sysret64)
 	pushq %rcx
 
 	pushq $VGCF_in_syscall
-1:	jmp hypercall_iret
-ENDPATCH(xen_sysret64)
-RELOC(xen_sysret64, 1b+1)
+	jmp hypercall_iret
 
 /*
  * Xen handles syscall callbacks much like ordinary exceptions, which
@@ -82,34 +103,47 @@ RELOC(xen_sysret64, 1b+1)
  *	rip
  *	r11
  * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
-.macro undo_xen_syscall
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-.endm
-
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_64_after_swapgs
+	popq %rcx
+	popq %r11
+
+	/*
+	 * Neither Xen nor the kernel really knows what the old SS and
+	 * CS were.  The kernel expects __USER_DS and __USER_CS, so
+	 * report those values even though Xen will guess its own values.
+	 */
+	movq $__USER_DS, 4*8(%rsp)
+	movq $__USER_CS, 1*8(%rsp)
+
+	jmp entry_SYSCALL_64_after_hwframe
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_compat
+	popq %rcx
+	popq %r11
+
+	/*
+	 * Neither Xen nor the kernel really knows what the old SS and
+	 * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
+	 * report those values even though Xen will guess its own values.
+	 */
+	movq $__USER32_DS, 4*8(%rsp)
+	movq $__USER32_CS, 1*8(%rsp)
+
+	jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
 ENTRY(xen_sysenter_target)
-	undo_xen_syscall
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
 ENDPROC(xen_sysenter_target)
 
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 72a8e6adebe6..a7525e95d53f 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -58,7 +58,7 @@ ENTRY(hypercall_page)
 #else
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
 	/* Map the p2m table to a 512GB-aligned user address. */
-	ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M,       .quad PGDIR_SIZE)
+	ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M,       .quad (PUD_SIZE * PTRS_PER_PUD))
 #endif
 #ifdef CONFIG_XEN_PV
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d5004477db6..c8a6d224f7ed 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -129,23 +129,15 @@ static inline void __init xen_efi_init(void)
 }
 #endif
 
-/* Declare an asm function, along with symbols needed to make it
-   inlineable */
-#define DECL_ASM(ret, name, ...)		\
-	__visible ret name(__VA_ARGS__);	\
-	extern char name##_end[] __visible;	\
-	extern char name##_reloc[] __visible
-
-DECL_ASM(void, xen_irq_enable_direct, void);
-DECL_ASM(void, xen_irq_disable_direct, void);
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
+__visible void xen_irq_enable_direct(void);
+__visible void xen_irq_disable_direct(void);
+__visible unsigned long xen_save_fl_direct(void);
+__visible void xen_restore_fl_direct(unsigned long);
 
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
 __visible void xen_sysret32(void);
 __visible void xen_sysret64(void);
-__visible void xen_adjust_exception_frame(void);
 
 extern int xen_panic_handler_init(void);
 
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index b39531babec0..eaaf1ebcc7a4 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -44,18 +44,10 @@
 	: "r" (uaddr), "I" (-EFAULT), "r" (oparg)	\
 	: "memory")
 
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
 {
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
 
 #if !XCHAL_HAVE_S32C1I
 	return -ENOSYS;
@@ -89,19 +81,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_enable();
 
-	if (ret)
-		return ret;
+	if (!ret)
+		*oval = oldval;
 
-	switch (cmp) {
-	case FUTEX_OP_CMP_EQ: return (oldval == cmparg);
-	case FUTEX_OP_CMP_NE: return (oldval != cmparg);
-	case FUTEX_OP_CMP_LT: return (oldval < cmparg);
-	case FUTEX_OP_CMP_GE: return (oldval >= cmparg);
-	case FUTEX_OP_CMP_LE: return (oldval <= cmparg);
-	case FUTEX_OP_CMP_GT: return (oldval > cmparg);
-	}
-
-	return -ENOSYS;
+	return ret;
 }
 
 static inline int
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index a36221cf6363..3bb49681ee24 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -33,11 +33,6 @@
 
 #define arch_spin_is_locked(x) ((x)->slock != 0)
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 24365b30aae9..b15b278aa314 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -103,20 +103,12 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 3eed2761c149..220059999e74 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -113,4 +113,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 33bfa5270d95..08175df7a69e 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -273,8 +273,8 @@ void __init init_arch(bp_tag_t *bp_start)
  * Initialize system. Setup memory and reserve regions.
  */
 
-extern char _end;
-extern char _stext;
+extern char _end[];
+extern char _stext[];
 extern char _WindowVectors_text_start;
 extern char _WindowVectors_text_end;
 extern char _DebugInterruptVector_literal_start;
@@ -333,7 +333,7 @@ void __init setup_arch(char **cmdline_p)
 	}
 #endif
 
-	mem_reserve(__pa(&_stext), __pa(&_end));
+	mem_reserve(__pa(_stext), __pa(_end));
 
 #ifdef CONFIG_VECTORS_OFFSET
 	mem_reserve(__pa(&_WindowVectors_text_start),