Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky: - Add the CPU id for the new z13s machine - Add a s390 specific XOR template for RAID-5 checksumming based on the XC instruction. Remove all other alternatives, XC is always faster - The merge of our four different stack tracers into a single one - Tidy up the code related to page tables, several large inline functions are now out-of-line. Bloat-o-meter reports ~11K text size reduction - A binary interface for the priviledged CLP instruction to retrieve the hardware view of the installed PCI functions - Improvements for the dasd format code - Bug fixes and cleanups * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (31 commits) s390/pci: enforce fmb page boundary rule s390: fix floating pointer register corruption (again) s390/cpumf: add missing lpp magic initialization s390: Fix misspellings in comments s390/mm: split arch/s390/mm/pgtable.c s390/mm: uninline pmdp_xxx functions from pgtable.h s390/mm: uninline ptep_xxx functions from pgtable.h s390/pci: add ioctl interface for CLP s390: Use pr_warn instead of pr_warning s390/dasd: remove casts to dasd_*_private s390/dasd: Refactor dasd format functions s390/dasd: Simplify code in format logic s390/dasd: Improve dasd format code s390/percpu: remove this_cpu_cmpxchg_double_4 s390/cpumf: Improve guest detection heuristics s390/fault: merge report_user_fault implementations s390/dis: use correct escape sequence for '%' character s390/kvm: simplify set_guest_storage_key s390/oprofile: add z13/z13s model numbers s390: add z13s model number to z13 elf platform ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-16 20:47:45 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-16 20:47:45 +0300
commit: 72aafdf01d826ad9e6fbd52bf689be937679f5d6 (patch)
tree: 2ee440ea4621aabee06b02ca5733ffb9593466d4 /arch/s390
parent: 1c8e85b17ac0707c7732081e94cadc5f89986e5f (diff)
parent: 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 (diff)
download: linux-72aafdf01d826ad9e6fbd52bf689be937679f5d6.tar.xz
51 files changed, 2358 insertions, 2183 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3be9c832dec1..683af90efe86 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -254,12 +254,12 @@ config MARCH_ZEC12
 	  older machines.
 
 config MARCH_Z13
-	bool "IBM z13"
+	bool "IBM z13s and z13"
 	select HAVE_MARCH_Z13_FEATURES
 	help
-	  Select this to enable optimizations for IBM z13 (2964 series).
-	  The kernel will be slightly faster but will not work on older
-	  machines.
+	  Select this to enable optimizations for IBM z13s and z13 (2965 and
+	  2964 series). The kernel will be slightly faster but will not work on
+	  older machines.
 
 endchoice
 
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index a0e71a501f7c..5687d62fb0cb 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -4,14 +4,23 @@
 /* CLP common request & response block size */
 #define CLP_BLK_SIZE			PAGE_SIZE
 
+#define CLP_LPS_BASE	0
+#define CLP_LPS_PCI	2
+
 struct clp_req_hdr {
 	u16 len;
 	u16 cmd;
+	u32 fmt		: 4;
+	u32 reserved1	: 28;
+	u64 reserved2;
 } __packed;
 
 struct clp_rsp_hdr {
 	u16 len;
 	u16 rsp;
+	u32 fmt		: 4;
+	u32 reserved1	: 28;
+	u64 reserved2;
 } __packed;
 
 /* CLP Response Codes */
@@ -25,4 +34,22 @@ struct clp_rsp_hdr {
 #define CLP_RC_NODATA			0x0080	/* No data available */
 #define CLP_RC_FC_UNKNOWN		0x0100	/* Function code not recognized */
 
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+	struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+	struct clp_rsp_hdr hdr;
+	u32 reserved2[4];
+	u32 lpif[8];
+	u32 reserved3[8];
+	u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+	struct clp_req_slpc request;
+	struct clp_rsp_slpc response;
+} __packed;
+
 #endif
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 000000000000..d054c1b07a3c
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,64 @@
+/*
+ *  KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2016
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+/**
+ * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+	struct list_head list;
+	struct list_head crst_list;
+	struct mm_struct *mm;
+	struct radix_tree_root guest_to_host;
+	struct radix_tree_root host_to_guest;
+	spinlock_t guest_table_lock;
+	unsigned long *table;
+	unsigned long asce;
+	unsigned long asce_end;
+	void *private;
+	bool pfault_enabled;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+	struct list_head list;
+	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..f833082d9167 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -45,7 +45,7 @@ struct zpci_fmb {
 	u64 rpcit_ops;
 	u64 dma_rbytes;
 	u64 dma_wbytes;
-} __packed __aligned(16);
+} __packed __aligned(64);
 
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
@@ -66,7 +66,6 @@ struct s390_domain;
 
 /* Private data per function */
 struct zpci_dev {
-	struct pci_dev	*pdev;
 	struct pci_bus	*bus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
 
@@ -192,7 +191,7 @@ int zpci_fmb_disable_device(struct zpci_dev *);
 /* Debug */
 int zpci_debug_init(void);
 void zpci_debug_exit(void);
-void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index dd78f92f1cce..e75c64cbcf08 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -49,9 +49,6 @@ struct clp_fh_list_entry {
 /* List PCI functions request */
 struct clp_req_list_pci {
 	struct clp_req_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u64 resume_token;
 	u64 reserved2;
 } __packed;
@@ -59,9 +56,6 @@ struct clp_req_list_pci {
 /* List PCI functions response */
 struct clp_rsp_list_pci {
 	struct clp_rsp_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u64 resume_token;
 	u32 reserved2;
 	u16 max_fn;
@@ -73,9 +67,6 @@ struct clp_rsp_list_pci {
 /* Query PCI function request */
 struct clp_req_query_pci {
 	struct clp_req_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u32 fh;				/* function handle */
 	u32 reserved2;
 	u64 reserved3;
@@ -84,9 +75,6 @@ struct clp_req_query_pci {
 /* Query PCI function response */
 struct clp_rsp_query_pci {
 	struct clp_rsp_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64			: 64;
 	u16 vfn;			/* virtual fn number */
 	u16			:  7;
 	u16 util_str_avail	:  1;	/* utility string available? */
@@ -108,21 +96,15 @@ struct clp_rsp_query_pci {
 /* Query PCI function group request */
 struct clp_req_query_pci_grp {
 	struct clp_req_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
-	u32			: 24;
+	u32 reserved2		: 24;
 	u32 pfgid		:  8;	/* function group id */
-	u32 reserved2;
-	u64 reserved3;
+	u32 reserved3;
+	u64 reserved4;
 } __packed;
 
 /* Query PCI function group response */
 struct clp_rsp_query_pci_grp {
 	struct clp_rsp_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u16			:  4;
 	u16 noi			: 12;	/* number of interrupts */
 	u8 version;
@@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp {
 /* Set PCI function request */
 struct clp_req_set_pci {
 	struct clp_req_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u32 fh;				/* function handle */
 	u16 reserved2;
 	u8 oc;				/* operation controls */
@@ -154,9 +133,6 @@ struct clp_req_set_pci {
 /* Set PCI function response */
 struct clp_rsp_set_pci {
 	struct clp_rsp_hdr hdr;
-	u32 fmt			:  4;	/* cmd request block format */
-	u32			: 28;
-	u64 reserved1;
 	u32 fh;				/* function handle */
 	u32 reserved3;
 	u64 reserved4;
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 6d6556ca24aa..90240dfef76a 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -178,7 +178,6 @@
 	ret__;								\
 })
 
-#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
 #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
 
 #include <asm-generic/percpu.h>
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index f897ec73dc8c..1f7ff85c5e4c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -21,7 +21,7 @@
 #define PMU_F_ERR_LSDA			0x0200
 #define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
 
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
 extern __init const struct attribute_group **cpumf_cf_event_group(void);
 extern ssize_t cpumf_events_sysfs_show(struct device *dev,
 				       struct device_attribute *attr,
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d7cc79fb6191..9b3d9b6099f2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 extern int page_table_allocate_pgste;
 
-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
-			  unsigned long key, bool nq);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
-
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
 	typedef struct { char _[n]; } addrtype;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 64ead8091248..2f66645587a2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr)
 
 /*
  * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- *				dy..R...I...wr
+ *				dy..R...I...rw
  * prot-none, clean, old	00..1...1...00
  * prot-none, clean, young	01..1...1...00
  * prot-none, dirty, old	10..1...1...00
  * prot-none, dirty, young	11..1...1...00
- * read-only, clean, old	00..1...1...01
- * read-only, clean, young	01..1...0...01
- * read-only, dirty, old	10..1...1...01
- * read-only, dirty, young	11..1...0...01
+ * read-only, clean, old	00..1...1...10
+ * read-only, clean, young	01..1...0...10
+ * read-only, dirty, old	10..1...1...10
+ * read-only, dirty, young	11..1...0...10
  * read-write, clean, old	00..1...1...11
  * read-write, clean, young	01..1...0...11
  * read-write, dirty, old	10..0...1...11
@@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd)
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 
-#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp,
-				 pmd_t entry, int dirty);
-
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
-
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 {
@@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 	return pmd;
 }
 
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
-	unsigned long new = 0;
-#ifdef CONFIG_PGSTE
-	unsigned long old;
-
-	preempt_disable();
-	asm(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
-		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
-		"	csg	%0,%1,%2\n"
-		"	jl	0b\n"
-		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
-		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
-#endif
-	return __pgste(new);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-	asm(
-		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
-		"	stg	%1,%0\n"
-		: "=Q" (ptep[PTRS_PER_PTE])
-		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
-		: "cc", "memory");
-	preempt_enable();
-#endif
-}
-
-static inline pgste_t pgste_get(pte_t *ptep)
-{
-	unsigned long pgste = 0;
-#ifdef CONFIG_PGSTE
-	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
-#endif
-	return __pgste(pgste);
-}
-
-static inline void pgste_set(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
-#endif
-}
-
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
-				       struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
-	unsigned long address, bits, skey;
-
-	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
-		return pgste;
-	address = pte_val(*ptep) & PAGE_MASK;
-	skey = (unsigned long) page_get_storage_key(address);
-	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-	/* Transfer page changed & referenced bit to guest bits in pgste */
-	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
-	/* Copy page access key and fetch protection bit to pgste */
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-#endif
-	return pgste;
-
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
-				 struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
-	unsigned long address;
-	unsigned long nkey;
-
-	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
-		return;
-	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
-	address = pte_val(entry) & PAGE_MASK;
-	/*
-	 * Set page access key and fetch protection bit from pgste.
-	 * The guest C/R information is still in the PGSTE, set real
-	 * key C/R to 0.
-	 */
-	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
-	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
-	page_set_storage_key(address, nkey, 0);
-#endif
-}
-
-static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
-{
-	if ((pte_val(entry) & _PAGE_PRESENT) &&
-	    (pte_val(entry) & _PAGE_WRITE) &&
-	    !(pte_val(entry) & _PAGE_INVALID)) {
-		if (!MACHINE_HAS_ESOP) {
-			/*
-			 * Without enhanced suppression-on-protection force
-			 * the dirty bit on for all writable ptes.
-			 */
-			pte_val(entry) |= _PAGE_DIRTY;
-			pte_val(entry) &= ~_PAGE_PROTECT;
-		}
-		if (!(pte_val(entry) & _PAGE_PROTECT))
-			/* This pte allows write access, set user-dirty */
-			pgste_val(pgste) |= PGSTE_UC_BIT;
-	}
-	*ptep = entry;
-	return pgste;
-}
-
-/**
- * struct gmap_struct - guest address space
- * @crst_list: list of all crst tables used in the guest address space
- * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
- * @guest_table_lock: spinlock to protect all entries in the guest page table
- * @table: pointer to the page directory
- * @asce: address space control element for gmap page table
- * @pfault_enabled: defines if pfaults are applicable for the guest
- */
-struct gmap {
-	struct list_head list;
-	struct list_head crst_list;
-	struct mm_struct *mm;
-	struct radix_tree_root guest_to_host;
-	struct radix_tree_root host_to_guest;
-	spinlock_t guest_table_lock;
-	unsigned long *table;
-	unsigned long asce;
-	unsigned long asce_end;
-	void *private;
-	bool pfault_enabled;
-};
-
-/**
- * struct gmap_notifier - notify function block for page invalidation
- * @notifier_call: address of callback function
- */
-struct gmap_notifier {
-	struct list_head list;
-	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
-};
-
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
-void gmap_free(struct gmap *gmap);
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
-		     unsigned long to, unsigned long len);
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
-void __gmap_zap(struct gmap *, unsigned long gaddr);
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
-
-
-void gmap_register_ipte_notifier(struct gmap_notifier *);
-void gmap_unregister_ipte_notifier(struct gmap_notifier *);
-int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
-
-static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
-					unsigned long addr,
-					pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-	if (pgste_val(pgste) & PGSTE_IN_BIT) {
-		pgste_val(pgste) &= ~PGSTE_IN_BIT;
-		gmap_do_ipte_notify(mm, addr, ptep);
-	}
-#endif
-	return pgste;
-}
-
-/*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified.  Thus, the following
- * hook is made available.
- */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t entry)
-{
-	pgste_t pgste;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
-		pgste_set_key(ptep, pgste, entry, mm);
-		pgste = pgste_set_pte(ptep, pgste, entry);
-		pgste_set_unlock(ptep, pgste);
-	} else {
-		*ptep = entry;
-	}
-}
-
 /*
  * query functions pte_write/pte_dirty/pte_young only work if
  * pte_present() is true. Undefined behaviour if not..
@@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
 	} while (nr != 255);
 }
 
-static inline void ptep_flush_direct(struct mm_struct *mm,
-				     unsigned long address, pte_t *ptep)
-{
-	int active, count;
-
-	if (pte_val(*ptep) & _PAGE_INVALID)
-		return;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte_local(address, ptep);
-	else
-		__ptep_ipte(address, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
-}
-
-static inline void ptep_flush_lazy(struct mm_struct *mm,
-				   unsigned long address, pte_t *ptep)
-{
-	int active, count;
-
-	if (pte_val(*ptep) & _PAGE_INVALID)
-		return;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
-		pte_val(*ptep) |= _PAGE_INVALID;
-		mm->context.flush_mm = 1;
-	} else
-		__ptep_ipte(address, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
-}
-
 /*
- * Get (and clear) the user dirty bit for a pte.
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ *   1) ptep_get_and_clear
+ *   2) set_pte_at
+ *   3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
  */
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
-						 unsigned long addr,
-						 pte_t *ptep)
-{
-	pgste_t pgste;
-	pte_t pte;
-	int dirty;
-
-	if (!mm_has_pgste(mm))
-		return 0;
-	pgste = pgste_get_lock(ptep);
-	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
-	pgste_val(pgste) &= ~PGSTE_UC_BIT;
-	pte = *ptep;
-	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
-		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep);
-		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
-			pte_val(pte) |= _PAGE_PROTECT;
-		else
-			pte_val(pte) |= _PAGE_INVALID;
-		*ptep = pte;
-	}
-	pgste_set_unlock(ptep, pgste);
-	return dirty;
-}
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 {
-	pgste_t pgste;
-	pte_t pte, oldpte;
-	int young;
-
-	if (mm_has_pgste(vma->vm_mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
-	}
-
-	oldpte = pte = *ptep;
-	ptep_flush_direct(vma->vm_mm, addr, ptep);
-	young = pte_young(pte);
-	pte = pte_mkold(pte);
-
-	if (mm_has_pgste(vma->vm_mm)) {
-		pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
-		pgste = pgste_set_pte(ptep, pgste, pte);
-		pgste_set_unlock(ptep, pgste);
-	} else
-		*ptep = pte;
+	pte_t pte = *ptep;
 
-	return young;
+	pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+	return pte_young(pte);
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 	return ptep_test_and_clear_young(vma, address, ptep);
 }
 
-/*
- * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
- * both clear the TLB for the unmapped pte. The reason is that
- * ptep_get_and_clear is used in common code (e.g. change_pte_range)
- * to modify an active pte. The sequence is
- *   1) ptep_get_and_clear
- *   2) set_pte_at
- *   3) flush_tlb_range
- * On s390 the tlb needs to get flushed with the modification of the pte
- * if the pte is active. The only way how this can be implemented is to
- * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
- * is a nop.
- */
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
-				       unsigned long address, pte_t *ptep)
+				       unsigned long addr, pte_t *ptep)
 {
-	pgste_t pgste;
-	pte_t pte;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
-	}
-
-	pte = *ptep;
-	ptep_flush_lazy(mm, address, ptep);
-	pte_val(*ptep) = _PAGE_INVALID;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste, mm);
-		pgste_set_unlock(ptep, pgste);
-	}
-	return pte;
+	return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 }
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
-					   unsigned long address,
-					   pte_t *ptep)
-{
-	pgste_t pgste;
-	pte_t pte;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste_ipte_notify(mm, address, ptep, pgste);
-	}
-
-	pte = *ptep;
-	ptep_flush_lazy(mm, address, ptep);
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste, mm);
-		pgste_set(ptep, pgste);
-	}
-	return pte;
-}
-
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
-					   unsigned long address,
-					   pte_t *ptep, pte_t pte)
-{
-	pgste_t pgste;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get(ptep);
-		pgste_set_key(ptep, pgste, pte, mm);
-		pgste = pgste_set_pte(ptep, pgste, pte);
-		pgste_set_unlock(ptep, pgste);
-	} else
-		*ptep = pte;
-}
+pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
-				     unsigned long address, pte_t *ptep)
+				     unsigned long addr, pte_t *ptep)
 {
-	pgste_t pgste;
-	pte_t pte;
-
-	if (mm_has_pgste(vma->vm_mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
-	}
-
-	pte = *ptep;
-	ptep_flush_direct(vma->vm_mm, address, ptep);
-	pte_val(*ptep) = _PAGE_INVALID;
-
-	if (mm_has_pgste(vma->vm_mm)) {
-		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
-		    _PGSTE_GPS_USAGE_UNUSED)
-			pte_val(pte) |= _PAGE_UNUSED;
-		pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
-		pgste_set_unlock(ptep, pgste);
-	}
-	return pte;
+	return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
 }
 
 /*
@@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
  */
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
-					    unsigned long address,
+					    unsigned long addr,
 					    pte_t *ptep, int full)
 {
-	pgste_t pgste;
-	pte_t pte;
-
-	if (!full && mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
-	}
-
-	pte = *ptep;
-	if (!full)
-		ptep_flush_lazy(mm, address, ptep);
-	pte_val(*ptep) = _PAGE_INVALID;
-
-	if (!full && mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste, mm);
-		pgste_set_unlock(ptep, pgste);
+	if (full) {
+		pte_t pte = *ptep;
+		*ptep = __pte(_PAGE_INVALID);
+		return pte;
 	}
-	return pte;
+	return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
-				       unsigned long address, pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
 {
-	pgste_t pgste;
 	pte_t pte = *ptep;
 
-	if (pte_write(pte)) {
-		if (mm_has_pgste(mm)) {
-			pgste = pgste_get_lock(ptep);
-			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
-		}
-
-		ptep_flush_lazy(mm, address, ptep);
-		pte = pte_wrprotect(pte);
-
-		if (mm_has_pgste(mm)) {
-			pgste = pgste_set_pte(ptep, pgste, pte);
-			pgste_set_unlock(ptep, pgste);
-		} else
-			*ptep = pte;
-	}
-	return pte;
+	if (pte_write(pte))
+		ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
 }
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 static inline int ptep_set_access_flags(struct vm_area_struct *vma,
-					unsigned long address, pte_t *ptep,
+					unsigned long addr, pte_t *ptep,
 					pte_t entry, int dirty)
 {
-	pgste_t pgste;
-	pte_t oldpte;
-
-	oldpte = *ptep;
-	if (pte_same(oldpte, entry))
+	if (pte_same(*ptep, entry))
 		return 0;
-	if (mm_has_pgste(vma->vm_mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
-	}
+	ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+	return 1;
+}
 
-	ptep_flush_direct(vma->vm_mm, address, ptep);
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char key, bool nq);
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
-	if (mm_has_pgste(vma->vm_mm)) {
-		if (pte_val(oldpte) & _PAGE_INVALID)
-			pgste_set_key(ptep, pgste, entry, vma->vm_mm);
-		pgste = pgste_set_pte(ptep, pgste, entry);
-		pgste_set_unlock(ptep, pgste);
-	} else
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t entry)
+{
+	if (mm_has_pgste(mm))
+		ptep_set_pte_at(mm, addr, ptep, entry);
+	else
 		*ptep = entry;
-	return 1;
 }
 
 /*
@@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 }
 
-static inline void pmdp_flush_direct(struct mm_struct *mm,
-				     unsigned long address, pmd_t *pmdp)
-{
-	int active, count;
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 
-	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
-		return;
-	if (!MACHINE_HAS_IDTE) {
-		__pmdp_csp(pmdp);
-		return;
-	}
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte_local(address, pmdp);
-	else
-		__pmdp_idte(address, pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
-}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-static inline void pmdp_flush_lazy(struct mm_struct *mm,
-				   unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+					unsigned long addr, pmd_t *pmdp,
+					pmd_t entry, int dirty)
 {
-	int active, count;
+	VM_BUG_ON(addr & ~HPAGE_MASK);
 
-	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
-		return;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
-		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
-		mm->context.flush_mm = 1;
-	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(address, pmdp);
-	else
-		__pmdp_csp(pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	entry = pmd_mkyoung(entry);
+	if (dirty)
+		entry = pmd_mkdirty(entry);
+	if (pmd_val(*pmdp) == pmd_val(entry))
+		return 0;
+	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+	return 1;
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
 
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
+	pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+	return pmd_young(pmd);
+}
 
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+					 unsigned long addr, pmd_t *pmdp)
+{
+	VM_BUG_ON(addr & ~HPAGE_MASK);
+	return pmdp_test_and_clear_young(vma, addr, pmdp);
+}
 
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
@@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 	return pmd;
 }
 
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long address, pmd_t *pmdp)
-{
-	pmd_t pmd;
-
-	pmd = *pmdp;
-	pmdp_flush_direct(vma->vm_mm, address, pmdp);
-	*pmdp = pmd_mkold(pmd);
-	return pmd_young(pmd);
-}
-
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-					    unsigned long address, pmd_t *pmdp)
+					    unsigned long addr, pmd_t *pmdp)
 {
-	pmd_t pmd = *pmdp;
-
-	pmdp_flush_direct(mm, address, pmdp);
-	pmd_clear(pmdp);
-	return pmd;
+	return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
-						 unsigned long address,
+						 unsigned long addr,
 						 pmd_t *pmdp, int full)
 {
-	pmd_t pmd = *pmdp;
-
-	if (!full)
-		pmdp_flush_lazy(mm, address, pmdp);
-	pmd_clear(pmdp);
-	return pmd;
+	if (full) {
+		pmd_t pmd = *pmdp;
+		*pmdp = __pmd(_SEGMENT_ENTRY_INVALID);
+		return pmd;
+	}
+	return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
 static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
-					  unsigned long address, pmd_t *pmdp)
+					  unsigned long addr, pmd_t *pmdp)
 {
-	return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+	return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
 }
 
 #define __HAVE_ARCH_PMDP_INVALIDATE
 static inline void pmdp_invalidate(struct vm_area_struct *vma,
-				   unsigned long address, pmd_t *pmdp)
+				   unsigned long addr, pmd_t *pmdp)
 {
-	pmdp_flush_direct(vma->vm_mm, address, pmdp);
+	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
-				      unsigned long address, pmd_t *pmdp)
+				      unsigned long addr, pmd_t *pmdp)
 {
 	pmd_t pmd = *pmdp;
 
-	if (pmd_write(pmd)) {
-		pmdp_flush_direct(mm, address, pmdp);
-		set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
-	}
+	if (pmd_write(pmd))
+		pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
 }
 
 static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 1c4fe129486d..d6fd22ea270d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -184,6 +184,10 @@ struct task_struct;
 struct mm_struct;
 struct seq_file;
 
+typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+void dump_trace(dump_trace_func_t func, void *data,
+		struct task_struct *task, unsigned long sp);
+
 void show_cacheinfo(struct seq_file *m);
 
 /* Free all resources held by a thread. */
@@ -203,6 +207,14 @@ unsigned long get_wchan(struct task_struct *p);
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
+static inline unsigned long current_stack_pointer(void)
+{
+	unsigned long sp;
+
+	asm volatile("la %0,0(15)" : "=a" (sp));
+	return sp;
+}
+
 static inline unsigned short stap(void)
 {
 	unsigned short cpu_address;
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 4b43ee7e6776..fead491dfc28 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -31,7 +31,7 @@
  * This should be totally fair - if anything is waiting, a process that wants a
  * lock will go to the back of the queue. When the currently active lock is
  * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
  * front, then they'll all be woken up, but no other readers will be.
  */
 
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 69837225119e..c0f0efbb6ab5 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -101,6 +101,8 @@ extern void pfault_fini(void);
 #define pfault_fini()		do { } while (0)
 #endif /* CONFIG_PFAULT */
 
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
 extern void cmma_init(void);
 
 extern void (*_machine_restart)(char *command);
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index c82eb12a5b18..c988df744a70 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -1 +1,20 @@
-#include <asm-generic/xor.h>
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_xc);			\
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST)	(&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 000000000000..ab72d9d24373
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,28 @@
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+	unsigned int c : 1;
+	unsigned int r : 1;
+	unsigned int lps : 6;
+	unsigned int cmd : 8;
+	unsigned int : 16;
+	unsigned int reserved;
+	__u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 53bbc9e8b281..1f95cc1faeb7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
 #include <asm/idle.h>
 #include <asm/vdso.h>
 #include <asm/pgtable.h>
+#include <asm/gmap.h>
 
 /*
  * Make sure that the compiler is new enough. We want a compiler that
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 7f768914fb4f..7f48e568ac64 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 			(((unsigned long)response + rlen) >> 31)) {
 		lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
 		if (!lowbuf) {
-			pr_warning("The cpcmd kernel function failed to "
-				   "allocate a response buffer\n");
+			pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
 			return -ENOMEM;
 		}
 		spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c890a5589e59..aa12de72fd47 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
 	/* Since debugfs currently does not support uid/gid other than root, */
 	/* we do not allow gid/uid != 0 until we get support for that. */
 	if ((uid != 0) || (gid != 0))
-		pr_warning("Root becomes the owner of all s390dbf files "
-			   "in sysfs\n");
+		pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
 	BUG_ON(!initialized);
 	mutex_lock(&debug_mutex);
 
@@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view,
 		new_level = debug_get_uint(str);
 	}
 	if(new_level < 0) {
-		pr_warning("%s is not a valid level for a debug "
-			   "feature\n", str);
+		pr_warn("%s is not a valid level for a debug feature\n", str);
 		rc = -EINVAL;
 	} else {
 		debug_set_level(id, new_level);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 62973efd214a..8cb9bfdd3ea8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 			}
 			if (separator)
 				ptr += sprintf(ptr, "%c", separator);
-			/*
-			 * Use four '%' characters below because of the
-			 * following two conversions:
-			 *
-			 *  1) sprintf: %%%%r -> %%r
-			 *  2) printk : %%r   -> %r
-			 */
 			if (operand->flags & OPERAND_GPR)
-				ptr += sprintf(ptr, "%%%%r%i", value);
+				ptr += sprintf(ptr, "%%r%i", value);
 			else if (operand->flags & OPERAND_FPR)
-				ptr += sprintf(ptr, "%%%%f%i", value);
+				ptr += sprintf(ptr, "%%f%i", value);
 			else if (operand->flags & OPERAND_AR)
-				ptr += sprintf(ptr, "%%%%a%i", value);
+				ptr += sprintf(ptr, "%%a%i", value);
 			else if (operand->flags & OPERAND_CR)
-				ptr += sprintf(ptr, "%%%%c%i", value);
+				ptr += sprintf(ptr, "%%c%i", value);
 			else if (operand->flags & OPERAND_VR)
-				ptr += sprintf(ptr, "%%%%v%i", value);
+				ptr += sprintf(ptr, "%%v%i", value);
 			else if (operand->flags & OPERAND_PCREL)
 				ptr += sprintf(ptr, "%lx", (signed int) value
 								      + addr);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 02bd02ff648b..2150b0139a0b 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -19,28 +19,28 @@
 #include <asm/ipl.h>
 
 /*
- * For show_trace we have tree different stack to consider:
+ * For dump_trace we have tree different stack to consider:
  *   - the panic stack which is used if the kernel stack has overflown
  *   - the asynchronous interrupt stack (cpu related)
  *   - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
+ * The stack trace can start at any of the three stacks and can potentially
  * touch all of them. The order is: panic stack, async stack, sync stack.
  */
 static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+	     unsigned long low, unsigned long high)
 {
 	struct stack_frame *sf;
 	struct pt_regs *regs;
-	unsigned long addr;
 
 	while (1) {
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		addr = sf->gprs[8];
-		printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
 		/* Follow the backchain. */
 		while (1) {
+			if (func(data, sf->gprs[8]))
+				return sp;
 			low = sp;
 			sp = sf->back_chain;
 			if (!sp)
@@ -48,46 +48,58 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			addr = sf->gprs[8];
-			printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
 		sp = (unsigned long) (sf + 1);
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		addr = regs->psw.addr;
-		printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+		if (!user_mode(regs)) {
+			if (func(data, regs->psw.addr))
+				return sp;
+		}
 		low = sp;
 		sp = regs->gprs[15];
 	}
 }
 
-static void show_trace(struct task_struct *task, unsigned long *stack)
+void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
+		unsigned long sp)
 {
-	const unsigned long frame_size =
-		STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	register unsigned long __r15 asm ("15");
-	unsigned long sp;
+	unsigned long frame_size;
 
-	sp = (unsigned long) stack;
-	if (!sp)
-		sp = task ? task->thread.ksp : __r15;
-	printk("Call Trace:\n");
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
 #ifdef CONFIG_CHECK_STACK
-	sp = __show_trace(sp,
+	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.panic_stack + frame_size - 4096,
 			  S390_lowcore.panic_stack + frame_size);
 #endif
-	sp = __show_trace(sp,
+	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size);
 	if (task)
-		__show_trace(sp, (unsigned long) task_stack_page(task),
-			     (unsigned long) task_stack_page(task) + THREAD_SIZE);
+		__dump_trace(func, data, sp,
+			     (unsigned long)task_stack_page(task),
+			     (unsigned long)task_stack_page(task) + THREAD_SIZE);
 	else
-		__show_trace(sp, S390_lowcore.thread_info,
+		__dump_trace(func, data, sp,
+			     S390_lowcore.thread_info,
 			     S390_lowcore.thread_info + THREAD_SIZE);
+}
+EXPORT_SYMBOL_GPL(dump_trace);
+
+static int show_address(void *data, unsigned long address)
+{
+	printk("([<%016lx>] %pSR)\n", address, (void *)address);
+	return 0;
+}
+
+static void show_trace(struct task_struct *task, unsigned long sp)
+{
+	if (!sp)
+		sp = task ? task->thread.ksp : current_stack_pointer();
+	printk("Call Trace:\n");
+	dump_trace(show_address, NULL, task, sp);
 	if (!task)
 		task = current;
 	debug_show_held_locks(task);
@@ -95,15 +107,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack)
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	register unsigned long *__r15 asm ("15");
 	unsigned long *stack;
 	int i;
 
-	if (!sp)
-		stack = task ? (unsigned long *) task->thread.ksp : __r15;
-	else
-		stack = sp;
-
+	stack = sp;
+	if (!stack) {
+		if (!task)
+			stack = (unsigned long *)current_stack_pointer();
+		else
+			stack = (unsigned long *)task->thread.ksp;
+	}
 	for (i = 0; i < 20; i++) {
 		if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
 			break;
@@ -112,7 +125,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 		printk("%016lx ", *stack++);
 	}
 	printk("\n");
-	show_trace(task, sp);
+	show_trace(task, (unsigned long)sp);
 }
 
 static void show_last_breaking_event(struct pt_regs *regs)
@@ -121,13 +134,9 @@ static void show_last_breaking_event(struct pt_regs *regs)
 	printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
 }
 
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
-	return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
 void show_registers(struct pt_regs *regs)
 {
+	struct psw_bits *psw = &psw_bits(regs->psw);
 	char *mode;
 
 	mode = user_mode(regs) ? "User" : "Krnl";
@@ -136,13 +145,9 @@ void show_registers(struct pt_regs *regs)
 		printk(" (%pSR)", (void *)regs->psw.addr);
 	printk("\n");
 	printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
-	       "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
-	       mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
-	       mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
-	       mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
-	       mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
-	       mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
-	printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+	       "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
+	       psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
+	printk(" RI:%x EA:%x", psw->ri, psw->eaba);
 	printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
 	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
 	printk("           %016lx %016lx %016lx %016lx\n",
@@ -160,7 +165,7 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 	/* Show stack backtrace if pt_regs is from kernel mode */
 	if (!user_mode(regs))
-		show_trace(NULL, (unsigned long *) regs->gprs[15]);
+		show_trace(NULL, regs->gprs[15]);
 	show_last_breaking_event(regs);
 }
 
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index cd5a191381b9..2d47f9cfcb36 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -186,6 +186,7 @@ ENTRY(__switch_to)
 	stg	%r5,__LC_THREAD_INFO		# store thread info of next
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
+	/* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
@@ -1199,114 +1200,12 @@ cleanup_critical:
 	.quad	.Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	bor	%r14
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
-	jhe	5f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
-	jhe	4f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
-	jhe	3f
-	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
-	jhe	2f
-	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
-	jhe	1f
-	lg	%r2,__LC_CURRENT
-	aghi	%r2,__TASK_thread
-0:	# Store floating-point controls
-	stfpc	__THREAD_FPU_fpc(%r2)
-1:	# Load register save area and check if VX is active
-	lg	%r3,__THREAD_FPU_regs(%r2)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	jz	4f			  # no VX -> store FP regs
-2:	# Store vector registers (V0-V15)
-	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
-3:	# Store vector registers (V16-V31)
-	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
-	j	5f			  # -> done, set CIF_FPU flag
-4:	# Store floating-point registers
-	std	0,0(%r3)
-	std	1,8(%r3)
-	std	2,16(%r3)
-	std	3,24(%r3)
-	std	4,32(%r3)
-	std	5,40(%r3)
-	std	6,48(%r3)
-	std	7,56(%r3)
-	std	8,64(%r3)
-	std	9,72(%r3)
-	std	10,80(%r3)
-	std	11,88(%r3)
-	std	12,96(%r3)
-	std	13,104(%r3)
-	std	14,112(%r3)
-	std	15,120(%r3)
-5:	# Set CIF_FPU flag
-	oi	__LC_CPU_FLAGS+7,_CIF_FPU
-	lg	%r9,48(%r11)		# return from save_fpu_regs
+	larl	%r9,save_fpu_regs
 	br	%r14
-.Lcleanup_save_fpu_fpc_end:
-	.quad	.Lsave_fpu_regs_fpc_end
-.Lcleanup_save_fpu_regs_vx_low:
-	.quad	.Lsave_fpu_regs_vx_low
-.Lcleanup_save_fpu_regs_vx_high:
-	.quad	.Lsave_fpu_regs_vx_high
-.Lcleanup_save_fpu_regs_fp:
-	.quad	.Lsave_fpu_regs_fp
-.Lcleanup_save_fpu_regs_done:
-	.quad	.Lsave_fpu_regs_done
 
 .Lcleanup_load_fpu_regs:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	bnor	%r14
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
-	jhe	1f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
-	jhe	2f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
-	jhe	3f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
-	jhe	4f
-	lg	%r4,__LC_CURRENT
-	aghi	%r4,__TASK_thread
-	lfpc	__THREAD_FPU_fpc(%r4)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	2f				# -> no VX, load FP regs
-4:	# Load V0 ..V15 registers
-	VLM	%v0,%v15,0,%r4
-3:	# Load V16..V31 registers
-	VLM	%v16,%v31,256,%r4
-	j	1f
-2:	# Load floating-point registers
-	ld	0,0(%r4)
-	ld	1,8(%r4)
-	ld	2,16(%r4)
-	ld	3,24(%r4)
-	ld	4,32(%r4)
-	ld	5,40(%r4)
-	ld	6,48(%r4)
-	ld	7,56(%r4)
-	ld	8,64(%r4)
-	ld	9,72(%r4)
-	ld	10,80(%r4)
-	ld	11,88(%r4)
-	ld	12,96(%r4)
-	ld	13,104(%r4)
-	ld	14,112(%r4)
-	ld	15,120(%r4)
-1:	# Clear CIF_FPU bit
-	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
-	lg	%r9,48(%r11)		# return from load_fpu_regs
+	larl	%r9,load_fpu_regs
 	br	%r14
-.Lcleanup_load_fpu_regs_vx:
-	.quad	.Lload_fpu_regs_vx
-.Lcleanup_load_fpu_regs_vx_high:
-	.quad	.Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp:
-	.quad	.Lload_fpu_regs_fp
-.Lcleanup_load_fpu_regs_done:
-	.quad	.Lload_fpu_regs_done
 
 /*
  * Integer constants
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f41d5208aaf7..c373a1d41d10 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -164,8 +164,7 @@ void do_softirq_own_stack(void)
 {
 	unsigned long old, new;
 
-	/* Get current stack pointer. */
-	asm volatile("la %0,0(15)" : "=a" (old));
+	old = current_stack_pointer();
 	/* Check against async. stack address range. */
 	new = S390_lowcore.async_stack;
 	if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 929c147e07b4..58bf4572d457 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 	/* Validate the counter that is assigned to this event.
 	 * Because the counter facility can use numerous counters at the
-	 * same time without constraints, it is not necessary to explicity
+	 * same time without constraints, it is not necessary to explicitly
 	 * validate event groups (event->group_leader != event).
 	 */
 	err = validate_event(hwc);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8da1e742c2..1a43474df541 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 	/*
 	 * A non-zero guest program parameter indicates a guest
 	 * sample.
-	 * Note that some early samples might be misaccounted to
-	 * the host.
+	 * Note that some early samples or samples from guests without
+	 * lpp usage would be misaccounted to the host. We use the asn
+	 * value as a heuristic to detect most of these guest samples.
+	 * If the value differs from the host hpp value, we assume
+	 * it to be a KVM guest.
 	 */
-	if (sfr->basic.gpp)
+	if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp)
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0943b11a2f6e..c3e4099b60a5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -222,67 +222,23 @@ static int __init service_level_perf_register(void)
 }
 arch_initcall(service_level_perf_register);
 
-/* See also arch/s390/kernel/traps.c */
-static unsigned long __store_trace(struct perf_callchain_entry *entry,
-				   unsigned long sp,
-				   unsigned long low, unsigned long high)
+static int __perf_callchain_kernel(void *data, unsigned long address)
 {
-	struct stack_frame *sf;
-	struct pt_regs *regs;
-
-	while (1) {
-		if (sp < low || sp > high - sizeof(*sf))
-			return sp;
-		sf = (struct stack_frame *) sp;
-		perf_callchain_store(entry, sf->gprs[8]);
-		/* Follow the backchain. */
-		while (1) {
-			low = sp;
-			sp = sf->back_chain;
-			if (!sp)
-				break;
-			if (sp <= low || sp > high - sizeof(*sf))
-				return sp;
-			sf = (struct stack_frame *) sp;
-			perf_callchain_store(entry, sf->gprs[8]);
-		}
-		/* Zero backchain detected, check for interrupt frame. */
-		sp = (unsigned long) (sf + 1);
-		if (sp <= low || sp > high - sizeof(*regs))
-			return sp;
-		regs = (struct pt_regs *) sp;
-		perf_callchain_store(entry, sf->gprs[8]);
-		low = sp;
-		sp = regs->gprs[15];
-	}
+	struct perf_callchain_entry *entry = data;
+
+	perf_callchain_store(entry, address);
+	return 0;
 }
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
 			   struct pt_regs *regs)
 {
-	unsigned long head, frame_size;
-	struct stack_frame *head_sf;
-
 	if (user_mode(regs))
 		return;
-
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	head = regs->gprs[15];
-	head_sf = (struct stack_frame *) head;
-
-	if (!head_sf || !head_sf->back_chain)
-		return;
-
-	head = head_sf->back_chain;
-	head = __store_trace(entry, head,
-			     S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
-			     S390_lowcore.async_stack + frame_size);
-
-	__store_trace(entry, head, S390_lowcore.thread_info,
-		      S390_lowcore.thread_info + THREAD_SIZE);
+	dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
 }
 
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
 ssize_t cpumf_events_sysfs_show(struct device *dev,
 				struct device_attribute *attr, char *page)
 {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index cedb0198675f..d3f9688f26b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -327,6 +327,7 @@ static void __init setup_lowcore(void)
 		+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
 	lc->thread_info = (unsigned long) &init_thread_union;
+	lc->lpp = LPP_MAGIC;
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
@@ -779,6 +780,7 @@ static int __init setup_hwcaps(void)
 		strcpy(elf_platform, "zEC12");
 		break;
 	case 0x2964:
+	case 0x2965:
 		strcpy(elf_platform, "z13");
 		break;
 	}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 8f64ebd63767..44f84b23d4e5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,78 +10,39 @@
 #include <linux/kallsyms.h>
 #include <linux/module.h>
 
-static unsigned long save_context_stack(struct stack_trace *trace,
-					unsigned long sp,
-					unsigned long low,
-					unsigned long high,
-					int savesched)
+static int __save_address(void *data, unsigned long address, int nosched)
 {
-	struct stack_frame *sf;
-	struct pt_regs *regs;
-	unsigned long addr;
+	struct stack_trace *trace = data;
 
-	while(1) {
-		if (sp < low || sp > high)
-			return sp;
-		sf = (struct stack_frame *)sp;
-		while(1) {
-			addr = sf->gprs[8];
-			if (!trace->skip)
-				trace->entries[trace->nr_entries++] = addr;
-			else
-				trace->skip--;
-			if (trace->nr_entries >= trace->max_entries)
-				return sp;
-			low = sp;
-			sp = sf->back_chain;
-			if (!sp)
-				break;
-			if (sp <= low || sp > high - sizeof(*sf))
-				return sp;
-			sf = (struct stack_frame *)sp;
-		}
-		/* Zero backchain detected, check for interrupt frame. */
-		sp = (unsigned long)(sf + 1);
-		if (sp <= low || sp > high - sizeof(*regs))
-			return sp;
-		regs = (struct pt_regs *)sp;
-		addr = regs->psw.addr;
-		if (savesched || !in_sched_functions(addr)) {
-			if (!trace->skip)
-				trace->entries[trace->nr_entries++] = addr;
-			else
-				trace->skip--;
-		}
-		if (trace->nr_entries >= trace->max_entries)
-			return sp;
-		low = sp;
-		sp = regs->gprs[15];
+	if (nosched && in_sched_functions(address))
+		return 0;
+	if (trace->skip > 0) {
+		trace->skip--;
+		return 0;
 	}
+	if (trace->nr_entries < trace->max_entries) {
+		trace->entries[trace->nr_entries++] = address;
+		return 0;
+	}
+	return 1;
 }
 
-static void __save_stack_trace(struct stack_trace *trace, unsigned long sp)
+static int save_address(void *data, unsigned long address)
 {
-	unsigned long new_sp, frame_size;
+	return __save_address(data, address, 0);
+}
 
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	new_sp = save_context_stack(trace, sp,
-			S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
-			S390_lowcore.panic_stack + frame_size, 1);
-	new_sp = save_context_stack(trace, new_sp,
-			S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
-			S390_lowcore.async_stack + frame_size, 1);
-	save_context_stack(trace, new_sp,
-			   S390_lowcore.thread_info,
-			   S390_lowcore.thread_info + THREAD_SIZE, 1);
+static int save_address_nosched(void *data, unsigned long address)
+{
+	return __save_address(data, address, 1);
 }
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	register unsigned long r15 asm ("15");
 	unsigned long sp;
 
-	sp = r15;
-	__save_stack_trace(trace, sp);
+	sp = current_stack_pointer();
+	dump_trace(save_address, trace, NULL, sp);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
@@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	unsigned long sp, low, high;
+	unsigned long sp;
 
 	sp = tsk->thread.ksp;
-	if (tsk == current) {
-		/* Get current stack pointer. */
-		asm volatile("la %0,0(15)" : "=a" (sp));
-	}
-	low = (unsigned long) task_stack_page(tsk);
-	high = (unsigned long) task_pt_regs(tsk);
-	save_context_stack(trace, sp, low, high, 0);
+	if (tsk == current)
+		sp = current_stack_pointer();
+	dump_trace(save_address_nosched, trace, tsk, sp);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
@@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	unsigned long sp;
 
 	sp = kernel_stack_pointer(regs);
-	__save_stack_trace(trace, sp);
+	dump_trace(save_address, trace, NULL, sp);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..c4e5f183f225 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -499,8 +499,7 @@ static void etr_reset(void)
 		if (etr_port0_online && etr_port1_online)
 			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
 	} else if (etr_port0_online || etr_port1_online) {
-		pr_warning("The real or virtual hardware system does "
-			   "not provide an ETR interface\n");
+		pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
 		etr_port0_online = etr_port1_online = 0;
 	}
 }
@@ -1464,8 +1463,7 @@ static void __init stp_reset(void)
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
-		pr_warning("The real or virtual hardware system does "
-			   "not provide an STP interface\n");
+		pr_warn("The real or virtual hardware system does not provide an STP interface\n");
 		free_page((unsigned long) stp_page);
 		stp_page = NULL;
 		stp_online = 0;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 017eb03daee2..dd97a3e8a34a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -22,8 +22,6 @@
 #include <asm/fpu/api.h>
 #include "entry.h"
 
-int show_unhandled_signals = 1;
-
 static inline void __user *get_trap_ip(struct pt_regs *regs)
 {
 	unsigned long address;
@@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
 	return (void __user *) (address - (regs->int_code >> 16));
 }
 
-static inline void report_user_fault(struct pt_regs *regs, int signr)
-{
-	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
-		return;
-	if (!unhandled_signal(current, signr))
-		return;
-	if (!printk_ratelimit())
-		return;
-	printk("User process fault: interruption code %04x ilc:%d ",
-	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr("in ", regs->psw.addr);
-	printk("\n");
-	show_regs(regs);
-}
-
 int is_valid_bugaddr(unsigned long addr)
 {
 	return 1;
@@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 		info.si_code = si_code;
 		info.si_addr = get_trap_ip(regs);
 		force_sig_info(si_signo, &info, current);
-		report_user_fault(regs, si_signo);
+		report_user_fault(regs, si_signo, 0);
         } else {
                 const struct exception_table_entry *fixup;
 		fixup = search_exception_tables(regs->psw.addr);
@@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap);
 void default_trap_handler(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
-		report_user_fault(regs, SIGSEGV);
+		report_user_fault(regs, SIGSEGV, 0);
 		do_exit(SIGSEGV);
 	} else
 		die(regs, "Unknown program exception");
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 05f7de9869a9..1ea4095b67d7 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/pgalloc.h>
+#include <asm/gmap.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index d697312ce9ee..e8c6843b9600 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -17,7 +17,7 @@
 /*
  * Extends the address range given by *start and *stop to include the address
  * range starting with estart and the length len. Takes care of overflowing
- * intervals and tries to minimize the overall intervall size.
+ * intervals and tries to minimize the overall interval size.
  */
 static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
 {
@@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
 		return;
 
 	/*
-	 * If the guest is not interrested in branching events, we can savely
+	 * If the guest is not interested in branching events, we can safely
 	 * limit them to the PER address range.
 	 */
 	if (!(*cr9 & PER_EVENT_BRANCH))
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 704809d91ddd..84efc2ba6a90 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -23,6 +23,7 @@
 #include <asm/uaccess.h>
 #include <asm/sclp.h>
 #include <asm/isc.h>
+#include <asm/gmap.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e196582fe87d..668c087513e5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
 #include <asm/lowcore.h>
 #include <asm/etr.h>
 #include <asm/pgtable.h>
+#include <asm/gmap.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
 #include <asm/isc.h>
@@ -281,7 +282,7 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 		address = gfn_to_hva_memslot(memslot, cur_gfn);
 
-		if (gmap_test_and_clear_dirty(address, gmap))
+		if (test_and_clear_guest_dirty(gmap->mm, address))
 			mark_page_dirty(kvm, cur_gfn);
 		if (fatal_signal_pending(current))
 			return;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f218ccf016c8..0a1591d3d25d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -23,6 +23,7 @@
 #include <asm/sysinfo.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/gmap.h>
 #include <asm/io.h>
 #include <asm/ptrace.h>
 #include <asm/compat.h>
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 0e8fefe5b0ce..1d1af31e8354 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
 #
 
 lib-y += delay.o string.o uaccess.o find.o
-obj-y += mem.o
+obj-y += mem.o xor.o
 lib-$(CONFIG_SMP) += spinlock.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000000..7d94e3ec34a9
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,134 @@
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/raid/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"3:\n"
+		: : "d" (bytes), "a" (p1), "a" (p2)
+		: "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		     unsigned long *p3)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+		: : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		     unsigned long *p3, unsigned long *p4)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	xc	0(256,%1),0(%4)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	la	%4,256(%4)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	ex	%0,12(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"	xc	0(1,%1),0(%4)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+		: : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	/* Get around a gcc oddity */
+	register unsigned long *reg7 asm ("7") = p5;
+
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	xc	0(256,%1),0(%4)\n"
+		"	xc	0(256,%1),0(%5)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	la	%4,256(%4)\n"
+		"	la	%5,256(%5)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	ex	%0,12(1)\n"
+		"	ex	%0,18(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"	xc	0(1,%1),0(%4)\n"
+		"	xc	0(1,%1),0(%5)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+		  "+a" (reg7)
+		: : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+	.name = "xc",
+	.do_2 = xor_xc_2,
+	.do_3 = xor_xc_3,
+	.do_4 = xor_xc_4,
+	.do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 839592ca265c..2ae54cad2b6a 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,11 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
 
-obj-y		:= init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
 obj-y		+= page-states.o gup.o extable.o pageattr.o mem_detect.o
+obj-y		+= pgtable.o pgalloc.o
 
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_S390_PTDUMP)	+= dump_pagetables.o
+obj-$(CONFIG_PGSTE)		+= gmap.o
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index a1bf4ad8925d..02042b6b66bf 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg)
 		goto out_free;
 	}
 	if (diag_cc > 1) {
-		pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
+		pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
 		rc = dcss_diag_translate_rc (vmrc);
 		goto out_free;
 	}
@@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 		goto out_resource;
 	}
 	if (diag_cc > 1) {
-		pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
-			   end_addr);
+		pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
 		rc = dcss_diag_translate_rc(end_addr);
 		dcss_diag(&purgeseg_scode, seg->dcss_name,
 				&dummy, &dummy);
@@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared)
 		goto out_unlock;
 	}
 	if (atomic_read (&seg->ref_count) != 1) {
-		pr_warning("DCSS %s is in use and cannot be reloaded\n",
-			   name);
+		pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
 		rc = -EAGAIN;
 		goto out_unlock;
 	}
@@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared)
 			seg->res->flags |= IORESOURCE_READONLY;
 
 	if (request_resource(&iomem_resource, seg->res)) {
-		pr_warning("DCSS %s overlaps with used memory resources "
-			   "and cannot be reloaded\n", name);
+		pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+			name);
 		rc = -EBUSY;
 		kfree(seg->res);
 		goto out_del_mem;
@@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared)
 		goto out_del_res;
 	}
 	if (diag_cc > 1) {
-		pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
-			   end_addr);
+		pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+			name, end_addr);
 		rc = dcss_diag_translate_rc(end_addr);
 		goto out_del_res;
 	}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 791a4146052c..cce577feab1e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/diag.h>
 #include <asm/pgtable.h>
+#include <asm/gmap.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
@@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs)
 {
 	unsigned long asce;
 
+	pr_alert("Failing address: %016lx TEID: %016lx\n",
+		 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
 	pr_alert("Fault in ");
 	switch (regs->int_parm_long & 3) {
 	case 3:
@@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs)
 	dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
 }
 
-static inline void report_user_fault(struct pt_regs *regs, long signr)
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
 {
 	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
 		return;
@@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
 	       regs->int_code & 0xffff, regs->int_code >> 17);
 	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
 	printk(KERN_CONT "\n");
-	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
-	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
-	dump_fault_info(regs);
+	if (is_mm_fault)
+		dump_fault_info(regs);
 	show_regs(regs);
 }
 
@@ -244,7 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
 {
 	struct siginfo si;
 
-	report_user_fault(regs, SIGSEGV);
+	report_user_fault(regs, SIGSEGV, 1);
 	si.si_signo = SIGSEGV;
 	si.si_code = si_code;
 	si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
@@ -272,8 +276,6 @@ static noinline void do_no_context(struct pt_regs *regs)
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request"
 		       " in virtual user address space\n");
-	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
-	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
 	dump_fault_info(regs);
 	die(regs, "Oops");
 	do_exit(SIGKILL);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 000000000000..69247b4dcc43
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,774 @@
+/*
+ *  KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2016
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+{
+	struct gmap *gmap;
+	struct page *page;
+	unsigned long *table;
+	unsigned long etype, atype;
+
+	if (limit < (1UL << 31)) {
+		limit = (1UL << 31) - 1;
+		atype = _ASCE_TYPE_SEGMENT;
+		etype = _SEGMENT_ENTRY_EMPTY;
+	} else if (limit < (1UL << 42)) {
+		limit = (1UL << 42) - 1;
+		atype = _ASCE_TYPE_REGION3;
+		etype = _REGION3_ENTRY_EMPTY;
+	} else if (limit < (1UL << 53)) {
+		limit = (1UL << 53) - 1;
+		atype = _ASCE_TYPE_REGION2;
+		etype = _REGION2_ENTRY_EMPTY;
+	} else {
+		limit = -1UL;
+		atype = _ASCE_TYPE_REGION1;
+		etype = _REGION1_ENTRY_EMPTY;
+	}
+	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+	if (!gmap)
+		goto out;
+	INIT_LIST_HEAD(&gmap->crst_list);
+	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+	spin_lock_init(&gmap->guest_table_lock);
+	gmap->mm = mm;
+	page = alloc_pages(GFP_KERNEL, 2);
+	if (!page)
+		goto out_free;
+	page->index = 0;
+	list_add(&page->lru, &gmap->crst_list);
+	table = (unsigned long *) page_to_phys(page);
+	crst_table_init(table, etype);
+	gmap->table = table;
+	gmap->asce = atype | _ASCE_TABLE_LENGTH |
+		_ASCE_USER_BITS | __pa(table);
+	gmap->asce_end = limit;
+	down_write(&mm->mmap_sem);
+	list_add(&gmap->list, &mm->context.gmap_list);
+	up_write(&mm->mmap_sem);
+	return gmap;
+
+out_free:
+	kfree(gmap);
+out:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_asce(gmap->mm, gmap->asce);
+	else
+		__tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			radix_tree_delete(root, index);
+		}
+	} while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+	struct page *page, *next;
+
+	/* Flush tlb. */
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_asce(gmap->mm, gmap->asce);
+	else
+		__tlb_flush_global();
+
+	/* Free all segment & region tables. */
+	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+		__free_pages(page, 2);
+	gmap_radix_tree_free(&gmap->guest_to_host);
+	gmap_radix_tree_free(&gmap->host_to_guest);
+	down_write(&gmap->mm->mmap_sem);
+	list_del(&gmap->list);
+	up_write(&gmap->mm->mmap_sem);
+	kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+			    unsigned long init, unsigned long gaddr)
+{
+	struct page *page;
+	unsigned long *new;
+
+	/* since we dont free the gmap table until gmap_free we can unlock */
+	page = alloc_pages(GFP_KERNEL, 2);
+	if (!page)
+		return -ENOMEM;
+	new = (unsigned long *) page_to_phys(page);
+	crst_table_init(new, init);
+	spin_lock(&gmap->mm->page_table_lock);
+	if (*table & _REGION_ENTRY_INVALID) {
+		list_add(&page->lru, &gmap->crst_list);
+		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+			(*table & _REGION_ENTRY_TYPE_MASK);
+		page->index = gaddr;
+		page = NULL;
+	}
+	spin_unlock(&gmap->mm->page_table_lock);
+	if (page)
+		__free_pages(page, 2);
+	return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+	struct page *page;
+	unsigned long offset, mask;
+
+	offset = (unsigned long) entry / sizeof(unsigned long);
+	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+	page = virt_to_page((void *)((unsigned long) entry & mask));
+	return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+	unsigned long *entry;
+	int flush = 0;
+
+	spin_lock(&gmap->guest_table_lock);
+	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+	if (entry) {
+		flush = (*entry != _SEGMENT_ENTRY_INVALID);
+		*entry = _SEGMENT_ENTRY_INVALID;
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	if ((to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || to + len < to)
+		return -EINVAL;
+
+	flush = 0;
+	down_write(&gmap->mm->mmap_sem);
+	for (off = 0; off < len; off += PMD_SIZE)
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+	up_write(&gmap->mm->mmap_sem);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	if ((from | to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || from + len < from || to + len < to ||
+	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+		return -EINVAL;
+
+	flush = 0;
+	down_write(&gmap->mm->mmap_sem);
+	for (off = 0; off < len; off += PMD_SIZE) {
+		/* Remove old translation */
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+		/* Store new translation */
+		if (radix_tree_insert(&gmap->guest_to_host,
+				      (to + off) >> PMD_SHIFT,
+				      (void *) from + off))
+			break;
+	}
+	up_write(&gmap->mm->mmap_sem);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	if (off >= len)
+		return 0;
+	gmap_unmap_segment(gmap, to, len);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long)
+		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long rc;
+
+	down_read(&gmap->mm->mmap_sem);
+	rc = __gmap_translate(gmap, gaddr);
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+		 unsigned long vmaddr)
+{
+	struct gmap *gmap;
+	int flush;
+
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+		if (flush)
+			gmap_flush_tlb(gmap);
+	}
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	unsigned long *table;
+	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int rc;
+
+	/* Create higher level tables in the gmap page table */
+	table = gmap->table;
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+		table += (gaddr >> 53) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+				     gaddr & 0xffe0000000000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+		table += (gaddr >> 42) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+				     gaddr & 0xfffffc0000000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+		table += (gaddr >> 31) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+				     gaddr & 0xffffffff80000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	table += (gaddr >> 20) & 0x7ff;
+	/* Walk the parent mm page table */
+	mm = gmap->mm;
+	pgd = pgd_offset(mm, vmaddr);
+	VM_BUG_ON(pgd_none(*pgd));
+	pud = pud_offset(pgd, vmaddr);
+	VM_BUG_ON(pud_none(*pud));
+	pmd = pmd_offset(pud, vmaddr);
+	VM_BUG_ON(pmd_none(*pmd));
+	/* large pmds cannot yet be handled */
+	if (pmd_large(*pmd))
+		return -EFAULT;
+	/* Link gmap segment table entry location to page table. */
+	rc = radix_tree_preload(GFP_KERNEL);
+	if (rc)
+		return rc;
+	ptl = pmd_lock(mm, pmd);
+	spin_lock(&gmap->guest_table_lock);
+	if (*table == _SEGMENT_ENTRY_INVALID) {
+		rc = radix_tree_insert(&gmap->host_to_guest,
+				       vmaddr >> PMD_SHIFT, table);
+		if (!rc)
+			*table = pmd_val(*pmd);
+	} else
+		rc = 0;
+	spin_unlock(&gmap->guest_table_lock);
+	spin_unlock(ptl);
+	radix_tree_preload_end();
+	return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+	       unsigned int fault_flags)
+{
+	unsigned long vmaddr;
+	int rc;
+	bool unlocked;
+
+	down_read(&gmap->mm->mmap_sem);
+
+retry:
+	unlocked = false;
+	vmaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(vmaddr)) {
+		rc = vmaddr;
+		goto out_up;
+	}
+	if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+			     &unlocked)) {
+		rc = -EFAULT;
+		goto out_up;
+	}
+	/*
+	 * In the case that fixup_user_fault unlocked the mmap_sem during
+	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
+	 */
+	if (unlocked)
+		goto retry;
+
+	rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	/* Find the vm address for the guest address */
+	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	if (vmaddr) {
+		vmaddr |= gaddr & ~PMD_MASK;
+		/* Get pointer to the page table entry */
+		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+		if (likely(ptep))
+			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+		pte_unmap_unlock(ptep, ptl);
+	}
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+	unsigned long gaddr, vmaddr, size;
+	struct vm_area_struct *vma;
+
+	down_read(&gmap->mm->mmap_sem);
+	for (gaddr = from; gaddr < to;
+	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+		/* Find the vm address for the guest address */
+		vmaddr = (unsigned long)
+			radix_tree_lookup(&gmap->guest_to_host,
+					  gaddr >> PMD_SHIFT);
+		if (!vmaddr)
+			continue;
+		vmaddr |= gaddr & ~PMD_MASK;
+		/* Find vma in the parent mm */
+		vma = find_vma(gmap->mm, vmaddr);
+		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+		zap_page_range(vma, vmaddr, size, NULL);
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_add(&nb->list, &gmap_notifier_list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_del_init(&nb->list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+{
+	unsigned long addr;
+	spinlock_t *ptl;
+	pte_t *ptep;
+	bool unlocked;
+	int rc = 0;
+
+	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+		return -EINVAL;
+	down_read(&gmap->mm->mmap_sem);
+	while (len) {
+		unlocked = false;
+		/* Convert gmap address and connect the page tables */
+		addr = __gmap_translate(gmap, gaddr);
+		if (IS_ERR_VALUE(addr)) {
+			rc = addr;
+			break;
+		}
+		/* Get the page mapped */
+		if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
+				     &unlocked)) {
+			rc = -EFAULT;
+			break;
+		}
+		/* While trying to map mmap_sem got unlocked. Let us retry */
+		if (unlocked)
+			continue;
+		rc = __gmap_link(gmap, gaddr, addr);
+		if (rc)
+			break;
+		/* Walk the process page table, lock and get pte pointer */
+		ptep = get_locked_pte(gmap->mm, addr, &ptl);
+		VM_BUG_ON(!ptep);
+		/* Set notification bit in the pgste of the pte */
+		if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+			ptep_set_notify(gmap->mm, addr, ptep);
+			gaddr += PAGE_SIZE;
+			len -= PAGE_SIZE;
+		}
+		pte_unmap_unlock(ptep, ptl);
+	}
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+{
+	unsigned long offset, gaddr;
+	unsigned long *table;
+	struct gmap_notifier *nb;
+	struct gmap *gmap;
+
+	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+	offset = offset * (4096 / sizeof(pte_t));
+	spin_lock(&gmap_notifier_lock);
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (!table)
+			continue;
+		gaddr = __gmap_segment_gaddr(table) + offset;
+		list_for_each_entry(nb, &gmap_notifier_list, list)
+			nb->notifier_call(gmap, gaddr);
+	}
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	struct vm_area_struct *vma;
+	unsigned long addr;
+
+	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+		for (addr = vma->vm_start;
+		     addr < vma->vm_end;
+		     addr += PAGE_SIZE)
+			follow_page(vma, addr, FOLL_SPLIT);
+		vma->vm_flags &= ~VM_HUGEPAGE;
+		vma->vm_flags |= VM_NOHUGEPAGE;
+	}
+	mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	/* Do we have pgstes? if yes, we are done */
+	if (mm_has_pgste(mm))
+		return 0;
+	/* Fail if the page tables are 2K */
+	if (!mm_alloc_pgste(mm))
+		return -EINVAL;
+	down_write(&mm->mmap_sem);
+	mm->context.has_pgste = 1;
+	/* split thp mappings and disable thp for future mappings */
+	thp_split_mm(mm);
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+			      unsigned long next, struct mm_walk *walk)
+{
+	/*
+	 * Remove all zero page mappings,
+	 * after establishing a policy to forbid zero page mappings
+	 * following faults for that page will get fresh anonymous pages
+	 */
+	if (is_zero_pfn(pte_pfn(*pte)))
+		ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
+	/* Clear storage key */
+	ptep_zap_key(walk->mm, addr, pte);
+	return 0;
+}
+
+int s390_enable_skey(void)
+{
+	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc = 0;
+
+	down_write(&mm->mmap_sem);
+	if (mm_use_skey(mm))
+		goto out_up;
+
+	mm->context.use_skey = 1;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+				MADV_UNMERGEABLE, &vma->vm_flags)) {
+			mm->context.use_skey = 0;
+			rc = -ENOMEM;
+			goto out_up;
+		}
+	}
+	mm->def_flags &= ~VM_MERGEABLE;
+
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+	up_write(&mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	ptep_zap_unused(walk->mm, addr, pte, 1);
+	return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+	down_write(&mm->mmap_sem);
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+	up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f81096b6940d..1b5e8983f4f3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
 	pmd_t *pmdp = (pmd_t *) ptep;
-	pte_t pte = huge_ptep_get(ptep);
+	pmd_t old;
 
-	pmdp_flush_direct(mm, addr, pmdp);
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
-	return pte;
+	old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return __pmd_to_pte(old);
 }
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 749c98407b41..f2a5c29a97e9 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr)
 static void change_page_attr(unsigned long addr, int numpages,
 			     pte_t (*set) (pte_t))
 {
-	pte_t *ptep, pte;
+	pte_t *ptep;
 	int i;
 
 	for (i = 0; i < numpages; i++) {
 		ptep = walk_page_table(addr);
 		if (WARN_ON_ONCE(!ptep))
 			break;
-		pte = *ptep;
-		pte = set(pte);
-		__ptep_ipte(addr, ptep);
-		*ptep = pte;
+		*ptep = set(*ptep);
 		addr += PAGE_SIZE;
 	}
+	__tlb_flush_kernel();
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 000000000000..f6c3de26cda8
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,360 @@
+/*
+ *  Page table allocation functions
+ *
+ *    Copyright IBM Corp. 2016
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+	{
+		.procname	= "allocate_pgste",
+		.data		= &page_table_allocate_pgste,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec,
+		.extra1		= &page_table_allocate_pgste_min,
+		.extra2		= &page_table_allocate_pgste_max,
+	},
+	{ }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+	{
+		.procname	= "vm",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= page_table_sysctl,
+	},
+	{ }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+	struct page *page = alloc_pages(GFP_KERNEL, 2);
+
+	if (!page)
+		return NULL;
+	return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	free_pages((unsigned long) table, 2);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		set_user_asce(mm);
+	}
+	__tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+	unsigned long *table, *pgd;
+	unsigned long entry;
+	int flush;
+
+	BUG_ON(limit > TASK_MAX_SIZE);
+	flush = 0;
+repeat:
+	table = crst_table_alloc(mm);
+	if (!table)
+		return -ENOMEM;
+	spin_lock_bh(&mm->page_table_lock);
+	if (mm->context.asce_limit < limit) {
+		pgd = (unsigned long *) mm->pgd;
+		if (mm->context.asce_limit <= (1UL << 31)) {
+			entry = _REGION3_ENTRY_EMPTY;
+			mm->context.asce_limit = 1UL << 42;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION3;
+		} else {
+			entry = _REGION2_ENTRY_EMPTY;
+			mm->context.asce_limit = 1UL << 53;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION2;
+		}
+		crst_table_init(table, entry);
+		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+		mm->pgd = (pgd_t *) table;
+		mm->task_size = mm->context.asce_limit;
+		table = NULL;
+		flush = 1;
+	}
+	spin_unlock_bh(&mm->page_table_lock);
+	if (table)
+		crst_table_free(mm, table);
+	if (mm->context.asce_limit < limit)
+		goto repeat;
+	if (flush)
+		on_each_cpu(__crst_table_upgrade, mm, 0);
+	return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+	pgd_t *pgd;
+
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		__tlb_flush_mm(mm);
+	}
+	while (mm->context.asce_limit > limit) {
+		pgd = mm->pgd;
+		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+		case _REGION_ENTRY_TYPE_R2:
+			mm->context.asce_limit = 1UL << 42;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION3;
+			break;
+		case _REGION_ENTRY_TYPE_R3:
+			mm->context.asce_limit = 1UL << 31;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_SEGMENT;
+			break;
+		default:
+			BUG();
+		}
+		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+		mm->task_size = mm->context.asce_limit;
+		crst_table_free(mm, (unsigned long *) pgd);
+	}
+	if (current->active_mm == mm)
+		set_user_asce(mm);
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+	unsigned int old, new;
+
+	do {
+		old = atomic_read(v);
+		new = old ^ bits;
+	} while (atomic_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+	unsigned long *table;
+	struct page *page;
+	unsigned int mask, bit;
+
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.list_lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			page = list_first_entry(&mm->context.pgtable_list,
+						struct page, lru);
+			mask = atomic_read(&page->_mapcount);
+			mask = (mask | (mask >> 4)) & 3;
+			if (mask != 3) {
+				table = (unsigned long *) page_to_phys(page);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&page->_mapcount, 1U << bit);
+				list_del(&page->lru);
+			}
+		}
+		spin_unlock_bh(&mm->context.list_lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	/* Initialize page table */
+	table = (unsigned long *) page_to_phys(page);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		atomic_set(&page->_mapcount, 3);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	} else {
+		/* Return the first 2K fragment of the page */
+		atomic_set(&page->_mapcount, 1);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+		spin_lock_bh(&mm->context.list_lock);
+		list_add(&page->lru, &mm->context.pgtable_list);
+		spin_unlock_bh(&mm->context.list_lock);
+	}
+	return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	struct page *page;
+	unsigned int bit, mask;
+
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.list_lock);
+		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+		if (mask & 3)
+			list_add(&page->lru, &mm->context.pgtable_list);
+		else
+			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
+		if (mask != 0)
+			return;
+	}
+
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+			 unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	struct page *page;
+	unsigned int bit, mask;
+
+	mm = tlb->mm;
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (mm_alloc_pgste(mm)) {
+		gmap_unlink(mm, table, vmaddr);
+		table = (unsigned long *) (__pa(table) | 3);
+		tlb_remove_table(tlb, table);
+		return;
+	}
+	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+	spin_lock_bh(&mm->context.list_lock);
+	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+	if (mask & 3)
+		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	else
+		list_del(&page->lru);
+	spin_unlock_bh(&mm->context.list_lock);
+	table = (unsigned long *) (__pa(table) | (1U << bit));
+	tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+	unsigned int mask = (unsigned long) _table & 3;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+	switch (mask) {
+	case 0:		/* pmd or pud */
+		free_pages((unsigned long) table, 2);
+		break;
+	case 1:		/* lower 2K of a 4K page table */
+	case 2:		/* higher 2K of a 4K page table */
+		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+			break;
+		/* fallthrough */
+	case 3:		/* 4K page table with pgstes */
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+		break;
+	}
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+	/*
+	 * This isn't an RCU grace period and hence the page-tables cannot be
+	 * assumed to be actually RCU-freed.
+	 *
+	 * It is however sufficient for software page-table walkers that rely
+	 * on IRQ disabling. See the comment near struct mmu_table_batch.
+	 */
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	__tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+	struct mmu_table_batch *batch;
+	int i;
+
+	batch = container_of(head, struct mmu_table_batch, rcu);
+
+	for (i = 0; i < batch->nr; i++)
+		__tlb_remove_table(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch) {
+		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		*batch = NULL;
+	}
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	tlb->mm->context.flush_mm = 1;
+	if (*batch == NULL) {
+		*batch = (struct mmu_table_batch *)
+			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		if (*batch == NULL) {
+			__tlb_flush_mm_lazy(tlb->mm);
+			tlb_remove_table_one(table);
+			return;
+		}
+		(*batch)->nr = 0;
+	}
+	(*batch)->tables[(*batch)->nr++] = table;
+	if ((*batch)->nr == MAX_TABLE_BATCH)
+		tlb_flush_mmu(tlb);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5109827883ac..4324b87f9398 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,591 +24,397 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-unsigned long *crst_table_alloc(struct mm_struct *mm)
-{
-	struct page *page = alloc_pages(GFP_KERNEL, 2);
-
-	if (!page)
-		return NULL;
-	return (unsigned long *) page_to_phys(page);
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
+{
+	int active, count;
+	pte_t old;
+
+	old = *ptep;
+	if (unlikely(pte_val(old) & _PAGE_INVALID))
+		return old;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__ptep_ipte_local(addr, ptep);
+	else
+		__ptep_ipte(addr, ptep);
+	atomic_sub(0x10000, &mm->context.attach_count);
+	return old;
 }
 
-void crst_table_free(struct mm_struct *mm, unsigned long *table)
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep)
 {
-	free_pages((unsigned long) table, 2);
+	int active, count;
+	pte_t old;
+
+	old = *ptep;
+	if (unlikely(pte_val(old) & _PAGE_INVALID))
+		return old;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if ((count & 0xffff) <= active) {
+		pte_val(*ptep) |= _PAGE_INVALID;
+		mm->context.flush_mm = 1;
+	} else
+		__ptep_ipte(addr, ptep);
+	atomic_sub(0x10000, &mm->context.attach_count);
+	return old;
 }
 
-static void __crst_table_upgrade(void *arg)
+static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
-	struct mm_struct *mm = arg;
+	unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+	unsigned long old;
 
-	if (current->active_mm == mm) {
-		clear_user_asce();
-		set_user_asce(mm);
-	}
-	__tlb_flush_local();
+	preempt_disable();
+	asm(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
+		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
+		"	csg	%0,%1,%2\n"
+		"	jl	0b\n"
+		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+	return __pgste(new);
 }
 
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 {
-	unsigned long *table, *pgd;
-	unsigned long entry;
-	int flush;
-
-	BUG_ON(limit > TASK_MAX_SIZE);
-	flush = 0;
-repeat:
-	table = crst_table_alloc(mm);
-	if (!table)
-		return -ENOMEM;
-	spin_lock_bh(&mm->page_table_lock);
-	if (mm->context.asce_limit < limit) {
-		pgd = (unsigned long *) mm->pgd;
-		if (mm->context.asce_limit <= (1UL << 31)) {
-			entry = _REGION3_ENTRY_EMPTY;
-			mm->context.asce_limit = 1UL << 42;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION3;
-		} else {
-			entry = _REGION2_ENTRY_EMPTY;
-			mm->context.asce_limit = 1UL << 53;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION2;
-		}
-		crst_table_init(table, entry);
-		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
-		mm->pgd = (pgd_t *) table;
-		mm->task_size = mm->context.asce_limit;
-		table = NULL;
-		flush = 1;
-	}
-	spin_unlock_bh(&mm->page_table_lock);
-	if (table)
-		crst_table_free(mm, table);
-	if (mm->context.asce_limit < limit)
-		goto repeat;
-	if (flush)
-		on_each_cpu(__crst_table_upgrade, mm, 0);
-	return 0;
+#ifdef CONFIG_PGSTE
+	asm(
+		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
+		"	stg	%1,%0\n"
+		: "=Q" (ptep[PTRS_PER_PTE])
+		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+		: "cc", "memory");
+	preempt_enable();
+#endif
 }
 
-void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_get(pte_t *ptep)
 {
-	pgd_t *pgd;
-
-	if (current->active_mm == mm) {
-		clear_user_asce();
-		__tlb_flush_mm(mm);
-	}
-	while (mm->context.asce_limit > limit) {
-		pgd = mm->pgd;
-		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
-		case _REGION_ENTRY_TYPE_R2:
-			mm->context.asce_limit = 1UL << 42;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION3;
-			break;
-		case _REGION_ENTRY_TYPE_R3:
-			mm->context.asce_limit = 1UL << 31;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_SEGMENT;
-			break;
-		default:
-			BUG();
-		}
-		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
-		mm->task_size = mm->context.asce_limit;
-		crst_table_free(mm, (unsigned long *) pgd);
-	}
-	if (current->active_mm == mm)
-		set_user_asce(mm);
+	unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+	return __pgste(pgste);
 }
 
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
 #ifdef CONFIG_PGSTE
+	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
 
-/**
- * gmap_alloc - allocate a guest address space
- * @mm: pointer to the parent mm_struct
- * @limit: maximum address of the gmap address space
- *
- * Returns a guest address space structure.
- */
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+				       struct mm_struct *mm)
 {
-	struct gmap *gmap;
-	struct page *page;
-	unsigned long *table;
-	unsigned long etype, atype;
-
-	if (limit < (1UL << 31)) {
-		limit = (1UL << 31) - 1;
-		atype = _ASCE_TYPE_SEGMENT;
-		etype = _SEGMENT_ENTRY_EMPTY;
-	} else if (limit < (1UL << 42)) {
-		limit = (1UL << 42) - 1;
-		atype = _ASCE_TYPE_REGION3;
-		etype = _REGION3_ENTRY_EMPTY;
-	} else if (limit < (1UL << 53)) {
-		limit = (1UL << 53) - 1;
-		atype = _ASCE_TYPE_REGION2;
-		etype = _REGION2_ENTRY_EMPTY;
-	} else {
-		limit = -1UL;
-		atype = _ASCE_TYPE_REGION1;
-		etype = _REGION1_ENTRY_EMPTY;
-	}
-	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
-	if (!gmap)
-		goto out;
-	INIT_LIST_HEAD(&gmap->crst_list);
-	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
-	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
-	spin_lock_init(&gmap->guest_table_lock);
-	gmap->mm = mm;
-	page = alloc_pages(GFP_KERNEL, 2);
-	if (!page)
-		goto out_free;
-	page->index = 0;
-	list_add(&page->lru, &gmap->crst_list);
-	table = (unsigned long *) page_to_phys(page);
-	crst_table_init(table, etype);
-	gmap->table = table;
-	gmap->asce = atype | _ASCE_TABLE_LENGTH |
-		_ASCE_USER_BITS | __pa(table);
-	gmap->asce_end = limit;
-	down_write(&mm->mmap_sem);
-	list_add(&gmap->list, &mm->context.gmap_list);
-	up_write(&mm->mmap_sem);
-	return gmap;
-
-out_free:
-	kfree(gmap);
-out:
-	return NULL;
+#ifdef CONFIG_PGSTE
+	unsigned long address, bits, skey;
+
+	if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+		return pgste;
+	address = pte_val(pte) & PAGE_MASK;
+	skey = (unsigned long) page_get_storage_key(address);
+	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+	/* Transfer page changed & referenced bit to guest bits in pgste */
+	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
+	/* Copy page access key and fetch protection bit to pgste */
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+	return pgste;
+
 }
-EXPORT_SYMBOL_GPL(gmap_alloc);
 
-static void gmap_flush_tlb(struct gmap *gmap)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+				 struct mm_struct *mm)
 {
-	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
-	else
-		__tlb_flush_global();
+#ifdef CONFIG_PGSTE
+	unsigned long address;
+	unsigned long nkey;
+
+	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+		return;
+	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+	address = pte_val(entry) & PAGE_MASK;
+	/*
+	 * Set page access key and fetch protection bit from pgste.
+	 * The guest C/R information is still in the PGSTE, set real
+	 * key C/R to 0.
+	 */
+	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+	page_set_storage_key(address, nkey, 0);
+#endif
 }
 
-static void gmap_radix_tree_free(struct radix_tree_root *root)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 {
-	struct radix_tree_iter iter;
-	unsigned long indices[16];
-	unsigned long index;
-	void **slot;
-	int i, nr;
-
-	/* A radix tree is freed by deleting all of its entries */
-	index = 0;
-	do {
-		nr = 0;
-		radix_tree_for_each_slot(slot, root, &iter, index) {
-			indices[nr] = iter.index;
-			if (++nr == 16)
-				break;
-		}
-		for (i = 0; i < nr; i++) {
-			index = indices[i];
-			radix_tree_delete(root, index);
+#ifdef CONFIG_PGSTE
+	if ((pte_val(entry) & _PAGE_PRESENT) &&
+	    (pte_val(entry) & _PAGE_WRITE) &&
+	    !(pte_val(entry) & _PAGE_INVALID)) {
+		if (!MACHINE_HAS_ESOP) {
+			/*
+			 * Without enhanced suppression-on-protection force
+			 * the dirty bit on for all writable ptes.
+			 */
+			pte_val(entry) |= _PAGE_DIRTY;
+			pte_val(entry) &= ~_PAGE_PROTECT;
 		}
-	} while (nr > 0);
+		if (!(pte_val(entry) & _PAGE_PROTECT))
+			/* This pte allows write access, set user-dirty */
+			pgste_val(pgste) |= PGSTE_UC_BIT;
+	}
+#endif
+	*ptep = entry;
+	return pgste;
 }
 
-/**
- * gmap_free - free a guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_free(struct gmap *gmap)
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+					unsigned long addr,
+					pte_t *ptep, pgste_t pgste)
 {
-	struct page *page, *next;
-
-	/* Flush tlb. */
-	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
-	else
-		__tlb_flush_global();
-
-	/* Free all segment & region tables. */
-	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, 2);
-	gmap_radix_tree_free(&gmap->guest_to_host);
-	gmap_radix_tree_free(&gmap->host_to_guest);
-	down_write(&gmap->mm->mmap_sem);
-	list_del(&gmap->list);
-	up_write(&gmap->mm->mmap_sem);
-	kfree(gmap);
+#ifdef CONFIG_PGSTE
+	if (pgste_val(pgste) & PGSTE_IN_BIT) {
+		pgste_val(pgste) &= ~PGSTE_IN_BIT;
+		ptep_notify(mm, addr, ptep);
+	}
+#endif
+	return pgste;
 }
-EXPORT_SYMBOL_GPL(gmap_free);
 
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
 {
-	S390_lowcore.gmap = (unsigned long) gmap;
+	pgste_t pgste = __pgste(0);
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+	}
+	return pgste;
 }
-EXPORT_SYMBOL_GPL(gmap_enable);
 
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
+static inline void ptep_xchg_commit(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep,
+				    pgste_t pgste, pte_t old, pte_t new)
 {
-	S390_lowcore.gmap = 0UL;
+	if (mm_has_pgste(mm)) {
+		if (pte_val(old) & _PAGE_INVALID)
+			pgste_set_key(ptep, pgste, new, mm);
+		if (pte_val(new) & _PAGE_INVALID) {
+			pgste = pgste_update_all(old, pgste, mm);
+			if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+			    _PGSTE_GPS_USAGE_UNUSED)
+				pte_val(old) |= _PAGE_UNUSED;
+		}
+		pgste = pgste_set_pte(ptep, pgste, new);
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		*ptep = new;
+	}
 }
-EXPORT_SYMBOL_GPL(gmap_disable);
 
-/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
- */
-static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
-			    unsigned long init, unsigned long gaddr)
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pte_t *ptep, pte_t new)
 {
-	struct page *page;
-	unsigned long *new;
-
-	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, 2);
-	if (!page)
-		return -ENOMEM;
-	new = (unsigned long *) page_to_phys(page);
-	crst_table_init(new, init);
-	spin_lock(&gmap->mm->page_table_lock);
-	if (*table & _REGION_ENTRY_INVALID) {
-		list_add(&page->lru, &gmap->crst_list);
-		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
-			(*table & _REGION_ENTRY_TYPE_MASK);
-		page->index = gaddr;
-		page = NULL;
-	}
-	spin_unlock(&gmap->mm->page_table_lock);
-	if (page)
-		__free_pages(page, 2);
-	return 0;
+	pgste_t pgste;
+	pte_t old;
+
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	old = ptep_flush_direct(mm, addr, ptep);
+	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	return old;
 }
+EXPORT_SYMBOL(ptep_xchg_direct);
 
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t new)
 {
-	struct page *page;
-	unsigned long offset, mask;
-
-	offset = (unsigned long) entry / sizeof(unsigned long);
-	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
-	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
-	page = virt_to_page((void *)((unsigned long) entry & mask));
-	return page->index + offset;
+	pgste_t pgste;
+	pte_t old;
+
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	old = ptep_flush_lazy(mm, addr, ptep);
+	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	return old;
 }
+EXPORT_SYMBOL(ptep_xchg_lazy);
 
-/**
- * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
- * @gmap: pointer to the guest address space structure
- * @vmaddr: address in the host process address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep)
 {
-	unsigned long *entry;
-	int flush = 0;
-
-	spin_lock(&gmap->guest_table_lock);
-	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
-	if (entry) {
-		flush = (*entry != _SEGMENT_ENTRY_INVALID);
-		*entry = _SEGMENT_ENTRY_INVALID;
+	pgste_t pgste;
+	pte_t old;
+
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	old = ptep_flush_lazy(mm, addr, ptep);
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_update_all(old, pgste, mm);
+		pgste_set(ptep, pgste);
 	}
-	spin_unlock(&gmap->guest_table_lock);
-	return flush;
+	return old;
 }
+EXPORT_SYMBOL(ptep_modify_prot_start);
 
-/**
- * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
- * @gmap: pointer to the guest address space structure
- * @gaddr: address in the guest address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep, pte_t pte)
 {
-	unsigned long vmaddr;
+	pgste_t pgste;
 
-	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
-						   gaddr >> PMD_SHIFT);
-	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get(ptep);
+		pgste_set_key(ptep, pgste, pte, mm);
+		pgste = pgste_set_pte(ptep, pgste, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		*ptep = pte;
+	}
 }
+EXPORT_SYMBOL(ptep_modify_prot_commit);
 
-/**
- * gmap_unmap_segment - unmap segment from the guest address space
- * @gmap: pointer to the guest address space structure
- * @to: address in the guest address space
- * @len: length of the memory area to unmap
- *
- * Returns 0 if the unmap succeeded, -EINVAL if not.
- */
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp)
 {
-	unsigned long off;
-	int flush;
-
-	if ((to | len) & (PMD_SIZE - 1))
-		return -EINVAL;
-	if (len == 0 || to + len < to)
-		return -EINVAL;
-
-	flush = 0;
-	down_write(&gmap->mm->mmap_sem);
-	for (off = 0; off < len; off += PMD_SIZE)
-		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
-	up_write(&gmap->mm->mmap_sem);
-	if (flush)
-		gmap_flush_tlb(gmap);
-	return 0;
+	int active, count;
+	pmd_t old;
+
+	old = *pmdp;
+	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+		return old;
+	if (!MACHINE_HAS_IDTE) {
+		__pmdp_csp(pmdp);
+		return old;
+	}
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__pmdp_idte_local(addr, pmdp);
+	else
+		__pmdp_idte(addr, pmdp);
+	atomic_sub(0x10000, &mm->context.attach_count);
+	return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+				    unsigned long addr, pmd_t *pmdp)
+{
+	int active, count;
+	pmd_t old;
+
+	old = *pmdp;
+	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+		return old;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if ((count & 0xffff) <= active) {
+		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+		mm->context.flush_mm = 1;
+	} else if (MACHINE_HAS_IDTE)
+		__pmdp_idte(addr, pmdp);
+	else
+		__pmdp_csp(pmdp);
+	atomic_sub(0x10000, &mm->context.attach_count);
+	return old;
 }
-EXPORT_SYMBOL_GPL(gmap_unmap_segment);
-
-/**
- * gmap_mmap_segment - map a segment to the guest address space
- * @gmap: pointer to the guest address space structure
- * @from: source address in the parent address space
- * @to: target address in the guest address space
- * @len: length of the memory area to map
- *
- * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
- */
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
-		     unsigned long to, unsigned long len)
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t new)
 {
-	unsigned long off;
-	int flush;
-
-	if ((from | to | len) & (PMD_SIZE - 1))
-		return -EINVAL;
-	if (len == 0 || from + len < from || to + len < to ||
-	    from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
-		return -EINVAL;
-
-	flush = 0;
-	down_write(&gmap->mm->mmap_sem);
-	for (off = 0; off < len; off += PMD_SIZE) {
-		/* Remove old translation */
-		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
-		/* Store new translation */
-		if (radix_tree_insert(&gmap->guest_to_host,
-				      (to + off) >> PMD_SHIFT,
-				      (void *) from + off))
-			break;
-	}
-	up_write(&gmap->mm->mmap_sem);
-	if (flush)
-		gmap_flush_tlb(gmap);
-	if (off >= len)
-		return 0;
-	gmap_unmap_segment(gmap, to, len);
-	return -ENOMEM;
+	pmd_t old;
+
+	old = pmdp_flush_direct(mm, addr, pmdp);
+	*pmdp = new;
+	return old;
 }
-EXPORT_SYMBOL_GPL(gmap_map_segment);
-
-/**
- * __gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+		     pmd_t *pmdp, pmd_t new)
 {
-	unsigned long vmaddr;
+	pmd_t old;
 
-	vmaddr = (unsigned long)
-		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
-	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+	old = pmdp_flush_lazy(mm, addr, pmdp);
+	*pmdp = new;
+	return old;
 }
-EXPORT_SYMBOL_GPL(__gmap_translate);
-
-/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
 {
-	unsigned long rc;
+	struct list_head *lh = (struct list_head *) pgtable;
 
-	down_read(&gmap->mm->mmap_sem);
-	rc = __gmap_translate(gmap, gaddr);
-	up_read(&gmap->mm->mmap_sem);
-	return rc;
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
 }
-EXPORT_SYMBOL_GPL(gmap_translate);
 
-/**
- * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
- * @table: pointer to the host page table
- * @vmaddr: vm address associated with the host page table
- */
-static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
-			unsigned long vmaddr)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 {
-	struct gmap *gmap;
-	int flush;
+	struct list_head *lh;
+	pgtable_t pgtable;
+	pte_t *ptep;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
 
-	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
-		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
-		if (flush)
-			gmap_flush_tlb(gmap);
+	/* FIFO */
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
 	}
+	ptep = (pte_t *) pgtable;
+	pte_val(*ptep) = _PAGE_INVALID;
+	ptep++;
+	pte_val(*ptep) = _PAGE_INVALID;
+	return pgtable;
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-/**
- * gmap_link - set up shadow page tables to connect a host to a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @vmaddr: vm address
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
 {
-	struct mm_struct *mm;
-	unsigned long *table;
-	spinlock_t *ptl;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	int rc;
-
-	/* Create higher level tables in the gmap page table */
-	table = gmap->table;
-	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
-		table += (gaddr >> 53) & 0x7ff;
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
-				     gaddr & 0xffe0000000000000UL))
-			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	}
-	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
-		table += (gaddr >> 42) & 0x7ff;
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
-				     gaddr & 0xfffffc0000000000UL))
-			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	}
-	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
-		table += (gaddr >> 31) & 0x7ff;
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
-				     gaddr & 0xffffffff80000000UL))
-			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	}
-	table += (gaddr >> 20) & 0x7ff;
-	/* Walk the parent mm page table */
-	mm = gmap->mm;
-	pgd = pgd_offset(mm, vmaddr);
-	VM_BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd, vmaddr);
-	VM_BUG_ON(pud_none(*pud));
-	pmd = pmd_offset(pud, vmaddr);
-	VM_BUG_ON(pmd_none(*pmd));
-	/* large pmds cannot yet be handled */
-	if (pmd_large(*pmd))
-		return -EFAULT;
-	/* Link gmap segment table entry location to page table. */
-	rc = radix_tree_preload(GFP_KERNEL);
-	if (rc)
-		return rc;
-	ptl = pmd_lock(mm, pmd);
-	spin_lock(&gmap->guest_table_lock);
-	if (*table == _SEGMENT_ENTRY_INVALID) {
-		rc = radix_tree_insert(&gmap->host_to_guest,
-				       vmaddr >> PMD_SHIFT, table);
-		if (!rc)
-			*table = pmd_val(*pmd);
-	} else
-		rc = 0;
-	spin_unlock(&gmap->guest_table_lock);
-	spin_unlock(ptl);
-	radix_tree_preload_end();
-	return rc;
+	pgste_t pgste;
+
+	/* the mm_has_pgste() check is done in set_pte_at() */
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+	pgste_set_key(ptep, pgste, entry, mm);
+	pgste = pgste_set_pte(ptep, pgste, entry);
+	pgste_set_unlock(ptep, pgste);
 }
 
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
-	       unsigned int fault_flags)
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	unsigned long vmaddr;
-	int rc;
-	bool unlocked;
-
-	down_read(&gmap->mm->mmap_sem);
-
-retry:
-	unlocked = false;
-	vmaddr = __gmap_translate(gmap, gaddr);
-	if (IS_ERR_VALUE(vmaddr)) {
-		rc = vmaddr;
-		goto out_up;
-	}
-	if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
-			     &unlocked)) {
-		rc = -EFAULT;
-		goto out_up;
-	}
-	/*
-	 * In the case that fixup_user_fault unlocked the mmap_sem during
-	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
-	 */
-	if (unlocked)
-		goto retry;
+	pgste_t pgste;
 
-	rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
-	up_read(&gmap->mm->mmap_sem);
-	return rc;
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) |= PGSTE_IN_BIT;
+	pgste_set_unlock(ptep, pgste);
 }
-EXPORT_SYMBOL_GPL(gmap_fault);
 
-static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 {
 	if (!non_swap_entry(entry))
 		dec_mm_counter(mm, MM_SWAPENTS);
@@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
 	free_swap_and_cache(entry);
 }
 
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, int reset)
 {
-	unsigned long vmaddr, ptev, pgstev;
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
+	unsigned long pgstev;
 	pgste_t pgste;
+	pte_t pte;
 
-	/* Find the vm address for the guest address */
-	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
-						   gaddr >> PMD_SHIFT);
-	if (!vmaddr)
-		return;
-	vmaddr |= gaddr & ~PMD_MASK;
-	/* Get pointer to the page table entry */
-	ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-	if (unlikely(!ptep))
-		return;
-	pte = *ptep;
-	if (!pte_swap(pte))
-		goto out_pte;
 	/* Zap unused and logically-zero pages */
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
-	ptev = pte_val(pte);
-	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
-	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
-		gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
-		pte_clear(gmap->mm, vmaddr, ptep);
-	}
+	pte = *ptep;
+	if (pte_swap(pte) &&
+	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+	     (pgstev & _PGSTE_GPS_ZERO))) {
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+		pte_clear(mm, addr, ptep);
+	}
+	if (reset)
+		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 	pgste_set_unlock(ptep, pgste);
-out_pte:
-	pte_unmap_unlock(ptep, ptl);
 }
-EXPORT_SYMBOL_GPL(__gmap_zap);
 
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
-
-	down_read(&gmap->mm->mmap_sem);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
-		if (!vmaddr)
-			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range(vma, vmaddr, size, NULL);
-	}
-	up_read(&gmap->mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
-static LIST_HEAD(gmap_notifier_list);
-static DEFINE_SPINLOCK(gmap_notifier_lock);
+	unsigned long ptev;
+	pgste_t pgste;
 
-/**
- * gmap_register_ipte_notifier - register a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_register_ipte_notifier(struct gmap_notifier *nb)
-{
-	spin_lock(&gmap_notifier_lock);
-	list_add(&nb->list, &gmap_notifier_list);
-	spin_unlock(&gmap_notifier_lock);
+	/* Clear storage key */
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+			      PGSTE_GR_BIT | PGSTE_GC_BIT);
+	ptev = pte_val(*ptep);
+	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+	pgste_set_unlock(ptep, pgste);
 }
-EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
 
-/**
- * gmap_unregister_ipte_notifier - remove a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
-{
-	spin_lock(&gmap_notifier_lock);
-	list_del_init(&nb->list);
-	spin_unlock(&gmap_notifier_lock);
-}
-EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
-
-/**
- * gmap_ipte_notify - mark a range of ptes for invalidation notification
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- *
- * Returns 0 if for each page in the given range a gmap mapping exists and
- * the invalidation notification could be set. If the gmap mapping is missing
- * for one or more pages -EFAULT is returned. If no memory could be allocated
- * -ENOMEM is returned. This function establishes missing page table entries.
+/*
+ * Test and reset if a guest page is dirty
  */
-int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
-	unsigned long addr;
 	spinlock_t *ptl;
-	pte_t *ptep, entry;
 	pgste_t pgste;
-	bool unlocked;
-	int rc = 0;
-
-	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
-		return -EINVAL;
-	down_read(&gmap->mm->mmap_sem);
-	while (len) {
-		unlocked = false;
-		/* Convert gmap address and connect the page tables */
-		addr = __gmap_translate(gmap, gaddr);
-		if (IS_ERR_VALUE(addr)) {
-			rc = addr;
-			break;
-		}
-		/* Get the page mapped */
-		if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
-				     &unlocked)) {
-			rc = -EFAULT;
-			break;
-		}
-		/* While trying to map mmap_sem got unlocked. Let us retry */
-		if (unlocked)
-			continue;
-		rc = __gmap_link(gmap, gaddr, addr);
-		if (rc)
-			break;
-		/* Walk the process page table, lock and get pte pointer */
-		ptep = get_locked_pte(gmap->mm, addr, &ptl);
-		VM_BUG_ON(!ptep);
-		/* Set notification bit in the pgste of the pte */
-		entry = *ptep;
-		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
-			pgste = pgste_get_lock(ptep);
-			pgste_val(pgste) |= PGSTE_IN_BIT;
-			pgste_set_unlock(ptep, pgste);
-			gaddr += PAGE_SIZE;
-			len -= PAGE_SIZE;
-		}
-		pte_unmap_unlock(ptep, ptl);
-	}
-	up_read(&gmap->mm->mmap_sem);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_ipte_notify);
-
-/**
- * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
- * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
- * @pte: pointer to the page table entry
- *
- * This function is assumed to be called with the page table lock held
- * for the pte to notify.
- */
-void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
-{
-	unsigned long offset, gaddr;
-	unsigned long *table;
-	struct gmap_notifier *nb;
-	struct gmap *gmap;
-
-	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
-	offset = offset * (4096 / sizeof(pte_t));
-	spin_lock(&gmap_notifier_lock);
-	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
-		table = radix_tree_lookup(&gmap->host_to_guest,
-					  vmaddr >> PMD_SHIFT);
-		if (!table)
-			continue;
-		gaddr = __gmap_segment_gaddr(table) + offset;
-		list_for_each_entry(nb, &gmap_notifier_list, list)
-			nb->notifier_call(gmap, gaddr);
+	pte_t *ptep;
+	pte_t pte;
+	bool dirty;
+
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep))
+		return false;
+
+	pgste = pgste_get_lock(ptep);
+	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pte = *ptep;
+	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+		__ptep_ipte(addr, ptep);
+		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+			pte_val(pte) |= _PAGE_PROTECT;
+		else
+			pte_val(pte) |= _PAGE_INVALID;
+		*ptep = pte;
 	}
-	spin_unlock(&gmap_notifier_lock);
+	pgste_set_unlock(ptep, pgste);
+
+	spin_unlock(ptl);
+	return dirty;
 }
-EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
-			  unsigned long key, bool nq)
+			  unsigned char key, bool nq)
 {
+	unsigned long keyul;
 	spinlock_t *ptl;
 	pgste_t old, new;
 	pte_t *ptep;
-	bool unlocked;
 
 	down_read(&mm->mmap_sem);
-retry:
-	unlocked = false;
 	ptep = get_locked_pte(mm, addr, &ptl);
 	if (unlikely(!ptep)) {
 		up_read(&mm->mmap_sem);
 		return -EFAULT;
 	}
-	if (!(pte_val(*ptep) & _PAGE_INVALID) &&
-	     (pte_val(*ptep) & _PAGE_PROTECT)) {
-		pte_unmap_unlock(ptep, ptl);
-		/*
-		 * We do not really care about unlocked. We will retry either
-		 * way. But this allows fixup_user_fault to enable userfaultfd.
-		 */
-		if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE,
-				     &unlocked)) {
-			up_read(&mm->mmap_sem);
-			return -EFAULT;
-		}
-		goto retry;
-	}
 
 	new = old = pgste_get_lock(ptep);
 	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
 			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
-	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	keyul = (unsigned long) key;
+	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 		unsigned long address, bits, skey;
 
@@ -863,13 +543,12 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
 {
+	unsigned char key;
 	spinlock_t *ptl;
 	pgste_t pgste;
 	pte_t *ptep;
-	uint64_t physaddr;
-	unsigned long key = 0;
 
 	down_read(&mm->mmap_sem);
 	ptep = get_locked_pte(mm, addr, &ptl);
@@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
 	pgste = pgste_get_lock(ptep);
 
 	if (pte_val(*ptep) & _PAGE_INVALID) {
-		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key  = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
 		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
 		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
 		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
 	} else {
-		physaddr = pte_val(*ptep) & PAGE_MASK;
-		key = page_get_storage_key(physaddr);
+		key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
 
 		/* Reflect guest's logical view, not physical */
 		if (pgste_val(pgste) & PGSTE_GR_BIT)
@@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
 	return key;
 }
 EXPORT_SYMBOL(get_guest_storage_key);
-
-static int page_table_allocate_pgste_min = 0;
-static int page_table_allocate_pgste_max = 1;
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
-	{
-		.procname	= "allocate_pgste",
-		.data		= &page_table_allocate_pgste,
-		.maxlen		= sizeof(int),
-		.mode		= S_IRUGO | S_IWUSR,
-		.proc_handler	= proc_dointvec,
-		.extra1		= &page_table_allocate_pgste_min,
-		.extra2		= &page_table_allocate_pgste_max,
-	},
-	{ }
-};
-
-static struct ctl_table page_table_sysctl_dir[] = {
-	{
-		.procname	= "vm",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= page_table_sysctl,
-	},
-	{ }
-};
-
-static int __init page_table_register_sysctl(void)
-{
-	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#else /* CONFIG_PGSTE */
-
-static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
-			unsigned long vmaddr)
-{
-}
-
-#endif /* CONFIG_PGSTE */
-
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
-	unsigned int old, new;
-
-	do {
-		old = atomic_read(v);
-		new = old ^ bits;
-	} while (atomic_cmpxchg(v, old, new) != old);
-	return new;
-}
-
-/*
- * page table entry allocation/free routines.
- */
-unsigned long *page_table_alloc(struct mm_struct *mm)
-{
-	unsigned long *table;
-	struct page *page;
-	unsigned int mask, bit;
-
-	/* Try to get a fragment of a 4K page as a 2K page table */
-	if (!mm_alloc_pgste(mm)) {
-		table = NULL;
-		spin_lock_bh(&mm->context.list_lock);
-		if (!list_empty(&mm->context.pgtable_list)) {
-			page = list_first_entry(&mm->context.pgtable_list,
-						struct page, lru);
-			mask = atomic_read(&page->_mapcount);
-			mask = (mask | (mask >> 4)) & 3;
-			if (mask != 3) {
-				table = (unsigned long *) page_to_phys(page);
-				bit = mask & 1;		/* =1 -> second 2K */
-				if (bit)
-					table += PTRS_PER_PTE;
-				atomic_xor_bits(&page->_mapcount, 1U << bit);
-				list_del(&page->lru);
-			}
-		}
-		spin_unlock_bh(&mm->context.list_lock);
-		if (table)
-			return table;
-	}
-	/* Allocate a fresh page */
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-	if (!page)
-		return NULL;
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	/* Initialize page table */
-	table = (unsigned long *) page_to_phys(page);
-	if (mm_alloc_pgste(mm)) {
-		/* Return 4K page table with PGSTEs */
-		atomic_set(&page->_mapcount, 3);
-		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
-	} else {
-		/* Return the first 2K fragment of the page */
-		atomic_set(&page->_mapcount, 1);
-		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
-		spin_lock_bh(&mm->context.list_lock);
-		list_add(&page->lru, &mm->context.pgtable_list);
-		spin_unlock_bh(&mm->context.list_lock);
-	}
-	return table;
-}
-
-void page_table_free(struct mm_struct *mm, unsigned long *table)
-{
-	struct page *page;
-	unsigned int bit, mask;
-
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (!mm_alloc_pgste(mm)) {
-		/* Free 2K page table fragment of a 4K page */
-		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
-		spin_lock_bh(&mm->context.list_lock);
-		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
-		if (mask & 3)
-			list_add(&page->lru, &mm->context.pgtable_list);
-		else
-			list_del(&page->lru);
-		spin_unlock_bh(&mm->context.list_lock);
-		if (mask != 0)
-			return;
-	}
-
-	pgtable_page_dtor(page);
-	atomic_set(&page->_mapcount, -1);
-	__free_page(page);
-}
-
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
-			 unsigned long vmaddr)
-{
-	struct mm_struct *mm;
-	struct page *page;
-	unsigned int bit, mask;
-
-	mm = tlb->mm;
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (mm_alloc_pgste(mm)) {
-		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | 3);
-		tlb_remove_table(tlb, table);
-		return;
-	}
-	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
-	spin_lock_bh(&mm->context.list_lock);
-	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
-	if (mask & 3)
-		list_add_tail(&page->lru, &mm->context.pgtable_list);
-	else
-		list_del(&page->lru);
-	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *) (__pa(table) | (1U << bit));
-	tlb_remove_table(tlb, table);
-}
-
-static void __tlb_remove_table(void *_table)
-{
-	unsigned int mask = (unsigned long) _table & 3;
-	void *table = (void *)((unsigned long) _table ^ mask);
-	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-
-	switch (mask) {
-	case 0:		/* pmd or pud */
-		free_pages((unsigned long) table, 2);
-		break;
-	case 1:		/* lower 2K of a 4K page table */
-	case 2:		/* higher 2K of a 4K page table */
-		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
-			break;
-		/* fallthrough */
-	case 3:		/* 4K page table with pgstes */
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
-		break;
-	}
-}
-
-static void tlb_remove_table_smp_sync(void *arg)
-{
-	/* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
-	/*
-	 * This isn't an RCU grace period and hence the page-tables cannot be
-	 * assumed to be actually RCU-freed.
-	 *
-	 * It is however sufficient for software page-table walkers that rely
-	 * on IRQ disabling. See the comment near struct mmu_table_batch.
-	 */
-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
-	__tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
-	struct mmu_table_batch *batch;
-	int i;
-
-	batch = container_of(head, struct mmu_table_batch, rcu);
-
-	for (i = 0; i < batch->nr; i++)
-		__tlb_remove_table(batch->tables[i]);
-
-	free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	if (*batch) {
-		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
-		*batch = NULL;
-	}
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	tlb->mm->context.flush_mm = 1;
-	if (*batch == NULL) {
-		*batch = (struct mmu_table_batch *)
-			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-		if (*batch == NULL) {
-			__tlb_flush_mm_lazy(tlb->mm);
-			tlb_remove_table_one(table);
-			return;
-		}
-		(*batch)->nr = 0;
-	}
-	(*batch)->tables[(*batch)->nr++] = table;
-	if ((*batch)->nr == MAX_TABLE_BATCH)
-		tlb_flush_mmu(tlb);
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void thp_split_vma(struct vm_area_struct *vma)
-{
-	unsigned long addr;
-
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
-		follow_page(vma, addr, FOLL_SPLIT);
-}
-
-static inline void thp_split_mm(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-
-	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
-		thp_split_vma(vma);
-		vma->vm_flags &= ~VM_HUGEPAGE;
-		vma->vm_flags |= VM_NOHUGEPAGE;
-	}
-	mm->def_flags |= VM_NOHUGEPAGE;
-}
-#else
-static inline void thp_split_mm(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-/*
- * switch on pgstes for its userspace process (for kvm)
- */
-int s390_enable_sie(void)
-{
-	struct mm_struct *mm = current->mm;
-
-	/* Do we have pgstes? if yes, we are done */
-	if (mm_has_pgste(mm))
-		return 0;
-	/* Fail if the page tables are 2K */
-	if (!mm_alloc_pgste(mm))
-		return -EINVAL;
-	down_write(&mm->mmap_sem);
-	mm->context.has_pgste = 1;
-	/* split thp mappings and disable thp for future mappings */
-	thp_split_mm(mm);
-	up_write(&mm->mmap_sem);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
-
-/*
- * Enable storage key handling from now on and initialize the storage
- * keys with the default key.
- */
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
-			      unsigned long next, struct mm_walk *walk)
-{
-	unsigned long ptev;
-	pgste_t pgste;
-
-	pgste = pgste_get_lock(pte);
-	/*
-	 * Remove all zero page mappings,
-	 * after establishing a policy to forbid zero page mappings
-	 * following faults for that page will get fresh anonymous pages
-	 */
-	if (is_zero_pfn(pte_pfn(*pte))) {
-		ptep_flush_direct(walk->mm, addr, pte);
-		pte_val(*pte) = _PAGE_INVALID;
-	}
-	/* Clear storage key */
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
-			      PGSTE_GR_BIT | PGSTE_GC_BIT);
-	ptev = pte_val(*pte);
-	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
-		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
-	pgste_set_unlock(pte, pgste);
-	return 0;
-}
-
-int s390_enable_skey(void)
-{
-	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	int rc = 0;
-
-	down_write(&mm->mmap_sem);
-	if (mm_use_skey(mm))
-		goto out_up;
-
-	mm->context.use_skey = 1;
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
-				MADV_UNMERGEABLE, &vma->vm_flags)) {
-			mm->context.use_skey = 0;
-			rc = -ENOMEM;
-			goto out_up;
-		}
-	}
-	mm->def_flags &= ~VM_MERGEABLE;
-
-	walk.mm = mm;
-	walk_page_range(0, TASK_SIZE, &walk);
-
-out_up:
-	up_write(&mm->mmap_sem);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(s390_enable_skey);
-
-/*
- * Reset CMMA state, make all pages stable again.
- */
-static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
-			     unsigned long next, struct mm_walk *walk)
-{
-	pgste_t pgste;
-
-	pgste = pgste_get_lock(pte);
-	pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
-	pgste_set_unlock(pte, pgste);
-	return 0;
-}
-
-void s390_reset_cmma(struct mm_struct *mm)
-{
-	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
-	down_write(&mm->mmap_sem);
-	walk.mm = mm;
-	walk_page_range(0, TASK_SIZE, &walk);
-	up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(s390_reset_cmma);
-
-/*
- * Test and reset if a guest page is dirty
- */
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
-{
-	pte_t *pte;
-	spinlock_t *ptl;
-	bool dirty = false;
-
-	pte = get_locked_pte(gmap->mm, address, &ptl);
-	if (unlikely(!pte))
-		return false;
-
-	if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
-		dirty = true;
-
-	spin_unlock(ptl);
-	return dirty;
-}
-EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
-			   pmd_t *pmdp)
-{
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	/* No need to flush TLB
-	 * On s390 reference bits are in storage key and never in TLB */
-	return pmdp_test_and_clear_young(vma, address, pmdp);
-}
-
-int pmdp_set_access_flags(struct vm_area_struct *vma,
-			  unsigned long address, pmd_t *pmdp,
-			  pmd_t entry, int dirty)
-{
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-	entry = pmd_mkyoung(entry);
-	if (dirty)
-		entry = pmd_mkdirty(entry);
-	if (pmd_same(*pmdp, entry))
-		return 0;
-	pmdp_invalidate(vma, address, pmdp);
-	set_pmd_at(vma->vm_mm, address, pmdp, entry);
-	return 1;
-}
-
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	struct list_head *lh = (struct list_head *) pgtable;
-
-	assert_spin_locked(pmd_lockptr(mm, pmdp));
-
-	/* FIFO */
-	if (!pmd_huge_pte(mm, pmdp))
-		INIT_LIST_HEAD(lh);
-	else
-		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
-	pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	struct list_head *lh;
-	pgtable_t pgtable;
-	pte_t *ptep;
-
-	assert_spin_locked(pmd_lockptr(mm, pmdp));
-
-	/* FIFO */
-	pgtable = pmd_huge_pte(mm, pmdp);
-	lh = (struct list_head *) pgtable;
-	if (list_empty(lh))
-		pmd_huge_pte(mm, pmdp) = NULL;
-	else {
-		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
-		list_del(lh);
-	}
-	ptep = (pte_t *) pgtable;
-	pte_val(*ptep) = _PAGE_INVALID;
-	ptep++;
-	pte_val(*ptep) = _PAGE_INVALID;
-	return pgtable;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 1bd23017191e..496e4a7ee00e 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o  \
 		timer_int.o )
 
-oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o
+oprofile-y :=	$(DRIVER_OBJS) init.o
 oprofile-y +=	hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
deleted file mode 100644
index 1884e1759529..000000000000
--- a/arch/s390/oprofile/backtrace.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * S390 Version
- *   Copyright IBM Corp. 2005
- *   Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
- */
-
-#include <linux/oprofile.h>
-
-#include <asm/processor.h> /* for struct stack_frame */
-
-static unsigned long
-__show_trace(unsigned int *depth, unsigned long sp,
-	     unsigned long low, unsigned long high)
-{
-	struct stack_frame *sf;
-	struct pt_regs *regs;
-
-	while (*depth) {
-		if (sp < low || sp > high - sizeof(*sf))
-			return sp;
-		sf = (struct stack_frame *) sp;
-		(*depth)--;
-		oprofile_add_trace(sf->gprs[8]);
-
-		/* Follow the backchain.  */
-		while (*depth) {
-			low = sp;
-			sp = sf->back_chain;
-			if (!sp)
-				break;
-			if (sp <= low || sp > high - sizeof(*sf))
-				return sp;
-			sf = (struct stack_frame *) sp;
-			(*depth)--;
-			oprofile_add_trace(sf->gprs[8]);
-
-		}
-
-		if (*depth == 0)
-			break;
-
-		/* Zero backchain detected, check for interrupt frame.  */
-		sp = (unsigned long) (sf + 1);
-		if (sp <= low || sp > high - sizeof(*regs))
-			return sp;
-		regs = (struct pt_regs *) sp;
-		(*depth)--;
-		oprofile_add_trace(sf->gprs[8]);
-		low = sp;
-		sp = regs->gprs[15];
-	}
-	return sp;
-}
-
-void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
-	unsigned long head, frame_size;
-	struct stack_frame* head_sf;
-
-	if (user_mode(regs))
-		return;
-
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	head = regs->gprs[15];
-	head_sf = (struct stack_frame*)head;
-
-	if (!head_sf->back_chain)
-		return;
-
-	head = head_sf->back_chain;
-
-	head = __show_trace(&depth, head,
-			    S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
-			    S390_lowcore.async_stack + frame_size);
-
-	__show_trace(&depth, head, S390_lowcore.thread_info,
-		     S390_lowcore.thread_info + THREAD_SIZE);
-}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..791935a65800 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -20,8 +20,6 @@
 
 #include "../../../drivers/oprofile/oprof.h"
 
-extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-
 #include "hwsampler.h"
 #include "op_counter.h"
 
@@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
 		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
 		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+		case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
 		default: return -ENODEV;
 		}
 	}
@@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void)
 	hwsampler_shutdown();
 }
 
+static int __s390_backtrace(void *data, unsigned long address)
+{
+	unsigned int *depth = data;
+
+	if (*depth == 0)
+		return 1;
+	(*depth)--;
+	oprofile_add_trace(address);
+	return 0;
+}
+
+static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
+{
+	if (user_mode(regs))
+		return;
+	dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+}
+
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8f19c8f9d660..9fd59a7cfcd3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -637,11 +637,9 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 
 int pcibios_add_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	int i;
 
-	zdev->pdev = pdev;
 	pdev->dev.groups = zpci_attr_groups;
 	zpci_map_resources(pdev);
 
@@ -664,8 +662,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
 
-	zdev->pdev = pdev;
-	zpci_debug_init_device(zdev);
+	zpci_debug_init_device(zdev, dev_name(&pdev->dev));
 	zpci_fmb_enable_device(zdev);
 
 	return pci_enable_resources(pdev, mask);
@@ -677,7 +674,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
 
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
-	zdev->pdev = NULL;
 }
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -864,8 +860,11 @@ static inline int barsize(u8 size)
 
 static int zpci_mem_init(void)
 {
+	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+		     __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
-				16, 0, NULL);
+					   __alignof__(struct zpci_fmb), 0, NULL);
 	if (!zdev_fmb_cache)
 		goto error_fmb;
 
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d6e411ed8b1f..21591ddb4c1f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -8,13 +8,19 @@
 #define KMSG_COMPONENT "zpci"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/compat.h>
 #include <linux/kernel.h>
+#include <linux/miscdevice.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/uaccess.h>
 #include <asm/pci_debug.h>
 #include <asm/pci_clp.h>
+#include <asm/compat.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
 
 static inline void zpci_err_clp(unsigned int rsp, int rc)
 {
@@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
 }
 
 /*
- * Call Logical Processor
- * Retry logic is handled by the caller.
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
  */
-static inline u8 clp_instr(void *data)
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+	unsigned long mask;
+	int cc = 3;
+
+	asm volatile (
+		"	.insn	rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+		: "cc");
+	*ilp = mask;
+	return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static inline int clp_req(void *data, unsigned int lps)
 {
 	struct { u8 _[CLP_BLK_SIZE]; } *req = data;
 	u64 ignored;
-	u8 cc;
+	int cc = 3;
 
 	asm volatile (
-		"	.insn	rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
-		"	ipm	%[cc]\n"
+		"	.insn	rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+		"0:	ipm	%[cc]\n"
 		"	srl	%[cc],28\n"
-		: [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
-		: [req] "a" (req)
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+		: [req] "a" (req), [lps] "i" (lps)
 		: "cc");
 	return cc;
 }
@@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
 	rrb->response.hdr.len = sizeof(rrb->response);
 	rrb->request.pfgid = pfgid;
 
-	rc = clp_instr(rrb);
+	rc = clp_req(rrb, CLP_LPS_PCI);
 	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
 		clp_store_query_pci_fngrp(zdev, &rrb->response);
 	else {
@@ -143,7 +171,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
 	rrb->response.hdr.len = sizeof(rrb->response);
 	rrb->request.fh = fh;
 
-	rc = clp_instr(rrb);
+	rc = clp_req(rrb, CLP_LPS_PCI);
 	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
 		rc = clp_store_query_pci_fn(zdev, &rrb->response);
 		if (rc)
@@ -214,7 +242,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
 		rrb->request.oc = command;
 		rrb->request.ndas = nr_dma_as;
 
-		rc = clp_instr(rrb);
+		rc = clp_req(rrb, CLP_LPS_PCI);
 		if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
 			retries--;
 			if (retries < 0)
@@ -280,7 +308,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
 		rrb->request.resume_token = resume_token;
 
 		/* Get PCI function handle list */
-		rc = clp_instr(rrb);
+		rc = clp_req(rrb, CLP_LPS_PCI);
 		if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
 			zpci_err("List PCI FN:\n");
 			zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -391,3 +419,198 @@ int clp_rescan_pci_devices_simple(void)
 	clp_free_block(rrb);
 	return rc;
 }
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+	switch (lpcb->cmd) {
+	case 0x0001: /* store logical-processor characteristics */
+		return clp_base_slpc(req, (void *) lpcb);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+			 struct clp_req_rsp_query_pci *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+			     struct clp_req_rsp_query_pci_grp *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+	    lpcb->request.reserved4 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+	switch (lpcb->cmd) {
+	case 0x0001: /* store logical-processor characteristics */
+		return clp_pci_slpc(req, (void *) lpcb);
+	case 0x0002: /* list PCI functions */
+		return clp_pci_list(req, (void *) lpcb);
+	case 0x0003: /* query PCI function */
+		return clp_pci_query(req, (void *) lpcb);
+	case 0x0004: /* query PCI function group */
+		return clp_pci_query_grp(req, (void *) lpcb);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+	struct clp_req_hdr *lpcb;
+	void __user *uptr;
+	int rc;
+
+	rc = -EINVAL;
+	if (req->lps != 0 && req->lps != 2)
+		goto out;
+
+	rc = -ENOMEM;
+	lpcb = clp_alloc_block(GFP_KERNEL);
+	if (!lpcb)
+		goto out;
+
+	rc = -EFAULT;
+	uptr = (void __force __user *)(unsigned long) req->data_p;
+	if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+		goto out_free;
+
+	rc = -EINVAL;
+	if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+		goto out_free;
+
+	switch (req->lps) {
+	case 0:
+		rc = clp_base_command(req, lpcb);
+		break;
+	case 2:
+		rc = clp_pci_command(req, lpcb);
+		break;
+	}
+	if (rc)
+		goto out_free;
+
+	rc = -EFAULT;
+	if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+		goto out_free;
+
+	rc = 0;
+
+out_free:
+	clp_free_block(lpcb);
+out:
+	return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+	void __user *uptr;
+	unsigned long ilp;
+	int exists;
+
+	if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+		return -EINVAL;
+
+	uptr = (void __force __user *)(unsigned long) req->data_p;
+	if (req->cmd == 0) {
+		/* Command code 0: test for a specific processor */
+		exists = test_bit_inv(req->lps, &ilp);
+		return put_user(exists, (int __user *) uptr);
+	}
+	/* Command code 1: return bit mask of installed processors */
+	return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct clp_req req;
+	void __user *argp;
+
+	if (cmd != CLP_SYNC)
+		return -EINVAL;
+
+	argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+	if (req.r != 0)
+		return -EINVAL;
+	return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+	.owner = THIS_MODULE,
+	.open = nonseekable_open,
+	.release = clp_misc_release,
+	.unlocked_ioctl = clp_misc_ioctl,
+	.compat_ioctl = clp_misc_ioctl,
+	.llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "clp",
+	.fops = &clp_misc_fops,
+};
+
+static int __init clp_misc_init(void)
+{
+	return misc_register(&clp_misc_device);
+}
+
+device_initcall(clp_misc_init);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 4129b0a5fd78..c555de3d12d6 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = {
 	.release = single_release,
 };
 
-void zpci_debug_init_device(struct zpci_dev *zdev)
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
 {
-	zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
-					       debugfs_root);
+	zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
 	if (IS_ERR(zdev->debugfs_dev))
 		zdev->debugfs_dev = NULL;
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4638b93c7632..a06ce8037cec 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table)
 	dma_free_cpu_table(table);
 }
 
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+static unsigned long __dma_alloc_iommu(struct device *dev,
 				       unsigned long start, int size)
 {
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long boundary_size;
 
-	boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 				start, size, 0, boundary_size, 0);
 }
 
-static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+static unsigned long dma_alloc_iommu(struct device *dev, int size)
 {
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long offset, flags;
 	int wrap = 0;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
-	offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 	if (offset == -1) {
 		/* wrap-around */
-		offset = __dma_alloc_iommu(zdev, 0, size);
+		offset = __dma_alloc_iommu(dev, 0, size);
 		wrap = 1;
 	}
 
@@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
 	return offset;
 }
 
-static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
 {
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long flags;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
@@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 
 	/* This rounds up number of pages based on size and offset */
 	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
+	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
 	if (iommu_page_index == -1) {
 		ret = -ENOSPC;
 		goto out_err;
@@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 	return dma_addr + (offset & ~PAGE_MASK);
 
 out_free:
-	dma_free_iommu(zdev, iommu_page_index, nr_pages);
+	dma_free_iommu(dev, iommu_page_index, nr_pages);
 out_err:
 	zpci_err("map error:\n");
 	zpci_err_dma(ret, pa);
@@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 
 	atomic64_add(npages, &zdev->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-	dma_free_iommu(zdev, iommu_page_index, npages);
+	dma_free_iommu(dev, iommu_page_index, npages);
 }
 
 static void *s390_dma_alloc(struct device *dev, size_t size,
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b0e04751c5d5..fb2a9a560fdc 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,11 +46,14 @@ struct zpci_ccdf_avail {
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
-	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+	struct pci_dev *pdev = NULL;
 
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
+	if (zdev)
+		pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 
@@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 		return;
 
 	pdev->error_state = pci_channel_io_perm_failure;
+	pci_dev_put(pdev);
 }
 
 void zpci_event_error(void *data)
@@ -69,9 +73,12 @@ void zpci_event_error(void *data)
 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
-	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+	struct pci_dev *pdev = NULL;
 	int ret;
 
+	if (zdev)
+		pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
 	pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
 		pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 	zpci_err("avail CCDF:\n");
@@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 	default:
 		break;
 	}
+	if (pdev)
+		pci_dev_put(pdev);
 }
 
 void zpci_event_availability(void *data)
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-16 20:47:45 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-16 20:47:45 +0300
commit	72aafdf01d826ad9e6fbd52bf689be937679f5d6 (patch)
tree	2ee440ea4621aabee06b02ca5733ffb9593466d4 /arch/s390
parent	1c8e85b17ac0707c7732081e94cadc5f89986e5f (diff)
parent	80c544ded25ac14d7cc3e555abb8ed2c2da99b84 (diff)
download	linux-72aafdf01d826ad9e6fbd52bf689be937679f5d6.tar.xz