115 files changed, 4223 insertions, 561 deletions
diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX
index 9189535f6cd2..317f0378ae01 100644
--- a/Documentation/s390/00-INDEX
+++ b/Documentation/s390/00-INDEX
@@ -22,5 +22,7 @@ qeth.txt
 	- HiperSockets Bridge Port Support.
 s390dbf.txt
 	- information on using the s390 debug feature.
+vfio-ccw.txt
+	  information on the vfio-ccw I/O subchannel driver.
 zfcpdump.txt
 	- information on the s390 SCSI dump tool.
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.txt
new file mode 100644
index 000000000000..90b3dfead81b
--- /dev/null
+++ b/Documentation/s390/vfio-ccw.txt
@@ -0,0 +1,303 @@
+vfio-ccw: the basic infrastructure
+==================================
+
+Introduction
+------------
+
+Here we describe the vfio support for I/O subchannel devices for
+Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
+virtual machine, while vfio is the means.
+
+Different than other hardware architectures, s390 has defined a unified
+I/O access method, which is so called Channel I/O. It has its own access
+patterns:
+- Channel programs run asynchronously on a separate (co)processor.
+- The channel subsystem will access any memory designated by the caller
+  in the channel program directly, i.e. there is no iommu involved.
+Thus when we introduce vfio support for these devices, we realize it
+with a mediated device (mdev) implementation. The vfio mdev will be
+added to an iommu group, so as to make itself able to be managed by the
+vfio framework. And we add read/write callbacks for special vfio I/O
+regions to pass the channel programs from the mdev to its parent device
+(the real I/O subchannel device) to do further address translation and
+to perform I/O instructions.
+
+This document does not intend to explain the s390 I/O architecture in
+every detail. More information/reference could be found here:
+- A good start to know Channel I/O in general:
+  https://en.wikipedia.org/wiki/Channel_I/O
+- s390 architecture:
+  s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+- The existing Qemu code which implements a simple emulated channel
+  subsystem could also be a good reference. It makes it easier to follow
+  the flow.
+  qemu/hw/s390x/css.c
+
+For vfio mediated device framework:
+- Documentation/vfio-mediated-device.txt
+
+Motivation of vfio-ccw
+----------------------
+
+Currently, a guest virtualized via qemu/kvm on s390 only sees
+paravirtualized virtio devices via the "Virtio Over Channel I/O
+(virtio-ccw)" transport. This makes virtio devices discoverable via
+standard operating system algorithms for handling channel devices.
+
+However this is not enough. On s390 for the majority of devices, which
+use the standard Channel I/O based mechanism, we also need to provide
+the functionality of passing through them to a Qemu virtual machine.
+This includes devices that don't have a virtio counterpart (e.g. tape
+drives) or that have specific characteristics which guests want to
+exploit.
+
+For passing a device to a guest, we want to use the same interface as
+everybody else, namely vfio. Thus, we would like to introduce vfio
+support for channel devices. And we would like to name this new vfio
+device "vfio-ccw".
+
+Access patterns of CCW devices
+------------------------------
+
+s390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the
+systems. Though the s390 hardware platform knows about a huge variety of
+different peripheral attachments like disk devices (aka. DASDs), tapes,
+communication controllers, etc. They can all be accessed by a well
+defined access method and they are presenting I/O completion a unified
+way: I/O interruptions.
+
+All I/O requires the use of channel command words (CCWs). A CCW is an
+instruction to a specialized I/O channel processor. A channel program is
+a sequence of CCWs which are executed by the I/O channel subsystem.  To
+issue a channel program to the channel subsystem, it is required to
+build an operation request block (ORB), which can be used to point out
+the format of the CCW and other control information to the system. The
+operating system signals the I/O channel subsystem to begin executing
+the channel program with a SSCH (start sub-channel) instruction. The
+central processor is then free to proceed with non-I/O instructions
+until interrupted. The I/O completion result is received by the
+interrupt handler in the form of interrupt response block (IRB).
+
+Back to vfio-ccw, in short:
+- ORBs and channel programs are built in guest kernel (with guest
+  physical addresses).
+- ORBs and channel programs are passed to the host kernel.
+- Host kernel translates the guest physical addresses to real addresses
+  and starts the I/O with issuing a privileged Channel I/O instruction
+  (e.g SSCH).
+- channel programs run asynchronously on a separate processor.
+- I/O completion will be signaled to the host with I/O interruptions.
+  And it will be copied as IRB to user space to pass it back to the
+  guest.
+
+Physical vfio ccw device and its child mdev
+-------------------------------------------
+
+As mentioned above, we realize vfio-ccw with a mdev implementation.
+
+Channel I/O does not have IOMMU hardware support, so the physical
+vfio-ccw device does not have an IOMMU level translation or isolation.
+
+Sub-channel I/O instructions are all privileged instructions, When
+handling the I/O instruction interception, vfio-ccw has the software
+policing and translation how the channel program is programmed before
+it gets sent to hardware.
+
+Within this implementation, we have two drivers for two types of
+devices:
+- The vfio_ccw driver for the physical subchannel device.
+  This is an I/O subchannel driver for the real subchannel device.  It
+  realizes a group of callbacks and registers to the mdev framework as a
+  parent (physical) device. As a consequence, mdev provides vfio_ccw a
+  generic interface (sysfs) to create mdev devices. A vfio mdev could be
+  created by vfio_ccw then and added to the mediated bus. It is the vfio
+  device that added to an IOMMU group and a vfio group.
+  vfio_ccw also provides an I/O region to accept channel program
+  request from user space and store I/O interrupt result for user
+  space to retrieve. To notify user space an I/O completion, it offers
+  an interface to setup an eventfd fd for asynchronous signaling.
+
+- The vfio_mdev driver for the mediated vfio ccw device.
+  This is provided by the mdev framework. It is a vfio device driver for
+  the mdev that created by vfio_ccw.
+  It realize a group of vfio device driver callbacks, adds itself to a
+  vfio group, and registers itself to the mdev framework as a mdev
+  driver.
+  It uses a vfio iommu backend that uses the existing map and unmap
+  ioctls, but rather than programming them into an IOMMU for a device,
+  it simply stores the translations for use by later requests. This
+  means that a device programmed in a VM with guest physical addresses
+  can have the vfio kernel convert that address to process virtual
+  address, pin the page and program the hardware with the host physical
+  address in one step.
+  For a mdev, the vfio iommu backend will not pin the pages during the
+  VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
+  of the iova<->vaddr mappings in this operation. And they export a
+  vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
+  backend for the physical devices to pin and unpin pages by demand.
+
+Below is a high Level block diagram.
+
+ +-------------+
+ |             |
+ | +---------+ | mdev_register_driver() +--------------+
+ | |  Mdev   | +<-----------------------+              |
+ | |  bus    | |                        | vfio_mdev.ko |
+ | | driver  | +----------------------->+              |<-> VFIO user
+ | +---------+ |    probe()/remove()    +--------------+    APIs
+ |             |
+ |  MDEV CORE  |
+ |   MODULE    |
+ |   mdev.ko   |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+              |
+ | | device  | |                        |  vfio_ccw.ko |<-> subchannel
+ | |interface| +----------------------->+              |     device
+ | +---------+ |       callback         +--------------+
+ +-------------+
+
+The process of how these work together.
+1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
+   physical device (with callbacks) to mdev framework.
+   When vfio_ccw probing the subchannel device, it registers device
+   pointer and callbacks to the mdev framework. Mdev related file nodes
+   under the device node in sysfs would be created for the subchannel
+   device, namely 'mdev_create', 'mdev_destroy' and
+   'mdev_supported_types'.
+2. Create a mediated vfio ccw device.
+   Use the 'mdev_create' sysfs file, we need to manually create one (and
+   only one for our case) mediated device.
+3. vfio_mdev.ko drives the mediated ccw device.
+   vfio_mdev is also the vfio device drvier. It will probe the mdev and
+   add it to an iommu_group and a vfio_group. Then we could pass through
+   the mdev to a guest.
+
+vfio-ccw I/O region
+-------------------
+
+An I/O region is used to accept channel program request from user
+space and store I/O interrupt result for user space to retrieve. The
+defination of the region is:
+
+struct ccw_io_region {
+#define ORB_AREA_SIZE 12
+	__u8	orb_area[ORB_AREA_SIZE];
+#define SCSW_AREA_SIZE 12
+	__u8	scsw_area[SCSW_AREA_SIZE];
+#define IRB_AREA_SIZE 96
+	__u8	irb_area[IRB_AREA_SIZE];
+	__u32	ret_code;
+} __packed;
+
+While starting an I/O request, orb_area should be filled with the
+guest ORB, and scsw_area should be filled with the SCSW of the Virtual
+Subchannel.
+
+irb_area stores the I/O result.
+
+ret_code stores a return code for each access of the region.
+
+vfio-ccw patches overview
+-------------------------
+
+For now, our patches are rebased on the latest mdev implementation.
+vfio-ccw follows what vfio-pci did on the s390 paltform and uses
+vfio-iommu-type1 as the vfio iommu backend. It's a good start to launch
+the code review for vfio-ccw. Note that the implementation is far from
+complete yet; but we'd like to get feedback for the general
+architecture.
+
+* CCW translation APIs
+- Description:
+  These introduce a group of APIs (start with 'cp_') to do CCW
+  translation. The CCWs passed in by a user space program are
+  organized with their guest physical memory addresses. These APIs
+  will copy the CCWs into the kernel space, and assemble a runnable
+  kernel channel program by updating the guest physical addresses with
+  their corresponding host physical addresses.
+- Patches:
+  vfio: ccw: introduce channel program interfaces
+
+* vfio_ccw device driver
+- Description:
+  The following patches utilizes the CCW translation APIs and introduce
+  vfio_ccw, which is the driver for the I/O subchannel devices you want
+  to pass through.
+  vfio_ccw implements the following vfio ioctls:
+    VFIO_DEVICE_GET_INFO
+    VFIO_DEVICE_GET_IRQ_INFO
+    VFIO_DEVICE_GET_REGION_INFO
+    VFIO_DEVICE_RESET
+    VFIO_DEVICE_SET_IRQS
+  This provides an I/O region, so that the user space program can pass a
+  channel program to the kernel, to do further CCW translation before
+  issuing them to a real device.
+  This also provides the SET_IRQ ioctl to setup an event notifier to
+  notify the user space program the I/O completion in an asynchronous
+  way.
+- Patches:
+  vfio: ccw: basic implementation for vfio_ccw driver
+  vfio: ccw: introduce ccw_io_region
+  vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl
+  vfio: ccw: realize VFIO_DEVICE_RESET ioctl
+  vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls
+
+The user of vfio-ccw is not limited to Qemu, while Qemu is definitely a
+good example to get understand how these patches work. Here is a little
+bit more detail how an I/O request triggered by the Qemu guest will be
+handled (without error handling).
+
+Explanation:
+Q1-Q7: Qemu side process.
+K1-K5: Kernel side process.
+
+Q1. Get I/O region info during initialization.
+Q2. Setup event notifier and handler to handle I/O completion.
+
+... ...
+
+Q3. Intercept a ssch instruction.
+Q4. Write the guest channel program and ORB to the I/O region.
+    K1. Copy from guest to kernel.
+    K2. Translate the guest channel program to a host kernel space
+        channel program, which becomes runnable for a real device.
+    K3. With the necessary information contained in the orb passed in
+        by Qemu, issue the ccwchain to the device.
+    K4. Return the ssch CC code.
+Q5. Return the CC code to the guest.
+
+... ...
+
+    K5. Interrupt handler gets the I/O result and write the result to
+        the I/O region.
+    K6. Signal Qemu to retrieve the result.
+Q6. Get the signal and event handler reads out the result from the I/O
+    region.
+Q7. Update the irb for the guest.
+
+Limitations
+-----------
+
+The current vfio-ccw implementation focuses on supporting basic commands
+needed to implement block device functionality (read/write) of DASD/ECKD
+device only. Some commands may need special handling in the future, for
+example, anything related to path grouping.
+
+DASD is a kind of storage device. While ECKD is a data recording format.
+More information for DASD and ECKD could be found here:
+https://en.wikipedia.org/wiki/Direct-access_storage_device
+https://en.wikipedia.org/wiki/Count_key_data
+
+Together with the corresponding work in Qemu, we can bring the passed
+through DASD/ECKD device online in a guest now and use it as a block
+device.
+
+Reference
+---------
+1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
+3. https://en.wikipedia.org/wiki/Channel_I/O
+4. Documentation/s390/cds.txt
+5. Documentation/vfio.txt
+6. Documentation/vfio-mediated-device.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index 33ecf266570f..5f91365ebc0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7201,6 +7201,7 @@ S:	Supported
 F:	Documentation/s390/kvm.txt
 F:	arch/s390/include/asm/kvm*
 F:	arch/s390/kvm/
+F:	arch/s390/mm/gmap.c
 
 KERNEL VIRTUAL MACHINE (KVM) FOR ARM
 M:	Christoffer Dall <christoffer.dall@linaro.org>
@@ -10896,6 +10897,16 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 F:	drivers/iommu/s390-iommu.c
 
+S390 VFIO-CCW DRIVER
+M:	Cornelia Huck <cornelia.huck@de.ibm.com>
+M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+L:	linux-s390@vger.kernel.org
+L:	kvm@vger.kernel.org
+S:	Supported
+F:	drivers/s390/cio/vfio_ccw*
+F:	Documentation/s390/vfio-ccw.txt
+F:	include/uapi/linux/vfio_ccw.h
+
 S3C24XX SD/MMC Driver
 M:	Ben Dooks <ben-linux@fluff.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e256592eb66e..eae2c64cf69d 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,7 +1,7 @@
 obj-y				+= kernel/
 obj-y				+= mm/
 obj-$(CONFIG_KVM)		+= kvm/
-obj-$(CONFIG_CRYPTO_HW)		+= crypto/
+obj-y				+= crypto/
 obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b8b143432381..e161fafb495b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -105,6 +105,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
@@ -123,7 +124,6 @@ config S390
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
-	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -506,6 +506,21 @@ source kernel/Kconfig.preempt
 
 source kernel/Kconfig.hz
 
+config ARCH_RANDOM
+	def_bool y
+	prompt "s390 architectural random number generation API"
+	help
+	  Enable the s390 architectural random number generation API
+	  to provide random data for all consumers within the Linux
+	  kernel.
+
+	  When enabled the arch_random_* functions declared in linux/random.h
+	  are implemented. The implementation is based on the s390 CPACF
+	  instruction subfunction TRNG which provides a real true random
+	  number generator.
+
+	  If unsure, say Y.
+
 endmenu
 
 menu "Memory setup"
@@ -536,6 +551,16 @@ config FORCE_MAX_ZONEORDER
 
 source "mm/Kconfig"
 
+config MAX_PHYSMEM_BITS
+	int "Maximum size of supported physical memory in bits (42-53)"
+	range 42 53
+	default "46"
+	help
+	  This option specifies the maximum supported size of physical memory
+	  in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB).
+	  Increasing the number of bits also increases the kernel image size.
+	  By default 46 bits (64TB) are supported.
+
 config PACK_STACK
 	def_bool y
 	prompt "Pack kernel stack"
@@ -613,7 +638,7 @@ if PCI
 config PCI_NR_FUNCTIONS
 	int "Maximum number of PCI functions (1-4096)"
 	range 1 4096
-	default "64"
+	default "128"
 	help
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
@@ -671,6 +696,16 @@ config EADM_SCH
 	  To compile this driver as a module, choose M here: the
 	  module will be called eadm_sch.
 
+config VFIO_CCW
+	def_tristate n
+	prompt "Support for VFIO-CCW subchannels"
+	depends on S390_CCW_IOMMU && VFIO_MDEV
+	help
+	  This driver allows usage of I/O subchannels via VFIO-CCW.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vfio_ccw.
+
 endmenu
 
 menu "Dump support"
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 4b176fe83da4..a5039fa89314 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -73,6 +73,7 @@ CONFIG_ZSWAP=y
 CONFIG_ZBUD=m
 CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PCI=y
 CONFIG_PCI_DEBUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 0de46cc397f6..83970b5afb2b 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -72,6 +72,7 @@ CONFIG_ZSWAP=y
 CONFIG_ZBUD=m
 CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PCI=y
 CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index e167557b434c..fbc6542aaf59 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -70,6 +70,7 @@ CONFIG_ZSWAP=y
 CONFIG_ZBUD=m
 CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PCI=y
 CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 4366a3e3e754..e23d97c13735 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -35,7 +35,6 @@ CONFIG_SCSI_ENCLOSURE=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_SRP_ATTRS=y
 CONFIG_ZFCP=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 402c530c6da5..678d9863e3f0 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -10,5 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
 obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
 
 crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
new file mode 100644
index 000000000000..9317b3e645e2
--- /dev/null
+++ b/arch/s390/crypto/arch_random.c
@@ -0,0 +1,31 @@
+/*
+ * s390 arch random implementation.
+ *
+ * Copyright IBM Corp. 2017
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/static_key.h>
+#include <asm/cpacf.h>
+
+DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
+
+atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(s390_arch_random_counter);
+
+static int __init s390_arch_random_init(void)
+{
+	/* check if subfunction CPACF_PRNO_TRNG is available */
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		static_branch_enable(&s390_arch_random_available);
+
+	return 0;
+}
+arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 716b17238599..a4e903ed7e21 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -616,7 +616,7 @@ out_err:
 module_init(paes_s390_init);
 module_exit(paes_s390_fini);
 
-MODULE_ALIAS_CRYPTO("aes-all");
+MODULE_ALIAS_CRYPTO("paes");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 5a3ec04a7082..3e47c4a0f18b 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -81,7 +81,7 @@ struct prng_ws_s {
 	u64 byte_counter;
 };
 
-struct ppno_ws_s {
+struct prno_ws_s {
 	u32 res;
 	u32 reseed_counter;
 	u64 stream_bytes;
@@ -93,7 +93,7 @@ struct prng_data_s {
 	struct mutex mutex;
 	union {
 		struct prng_ws_s prngws;
-		struct ppno_ws_s ppnows;
+		struct prno_ws_s prnows;
 	};
 	u8 *buf;
 	u32 rest;
@@ -306,12 +306,12 @@ static int __init prng_sha512_selftest(void)
 		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
 
 	u8 buf[sizeof(random)];
-	struct ppno_ws_s ws;
+	struct prno_ws_s ws;
 
 	memset(&ws, 0, sizeof(ws));
 
 	/* initial seed */
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
 		   &ws, NULL, 0, seed, sizeof(seed));
 
 	/* check working states V and C */
@@ -324,9 +324,9 @@ static int __init prng_sha512_selftest(void)
 	}
 
 	/* generate random bytes */
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
 		   &ws, buf, sizeof(buf), NULL, 0);
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
 		   &ws, buf, sizeof(buf), NULL, 0);
 
 	/* check against expected data */
@@ -374,16 +374,16 @@ static int __init prng_sha512_instantiate(void)
 	/* followed by 16 bytes of unique nonce */
 	get_tod_clock_ext(seed + 64 + 32);
 
-	/* initial seed of the ppno drng */
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
+	/* initial seed of the prno drng */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
 	   bytes for the FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
 		prng_data->prev = prng_data->buf + prng_chunk_size;
-		cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			   &prng_data->ppnows,
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+			   &prng_data->prnows,
 			   prng_data->prev, prng_chunk_size, NULL, 0);
 	}
 
@@ -412,9 +412,9 @@ static int prng_sha512_reseed(void)
 	if (ret != sizeof(seed))
 		return ret;
 
-	/* do a reseed of the ppno drng with this bytestring */
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
+	/* do a reseed of the prno drng with this bytestring */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
 
 	return 0;
 }
@@ -425,15 +425,15 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
 	int ret;
 
 	/* reseed needed ? */
-	if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+	if (prng_data->prnows.reseed_counter > prng_reseed_limit) {
 		ret = prng_sha512_reseed();
 		if (ret)
 			return ret;
 	}
 
-	/* PPNO generate */
-	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-		   &prng_data->ppnows, buf, nbytes, NULL, 0);
+	/* PRNO generate */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+		   &prng_data->prnows, buf, nbytes, NULL, 0);
 
 	/* FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
@@ -653,7 +653,7 @@ static ssize_t prng_counter_show(struct device *dev,
 	if (mutex_lock_interruptible(&prng_data->mutex))
 		return -ERESTARTSYS;
 	if (prng_mode == PRNG_MODE_SHA512)
-		counter = prng_data->ppnows.stream_bytes;
+		counter = prng_data->prnows.stream_bytes;
 	else
 		counter = prng_data->prngws.byte_counter;
 	mutex_unlock(&prng_data->mutex);
@@ -774,8 +774,8 @@ static int __init prng_init(void)
 
 	/* choose prng mode */
 	if (prng_mode != PRNG_MODE_TDES) {
-		/* check for MSA5 support for PPNO operations */
-		if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
+		/* check for MSA5 support for PRNO operations */
+		if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
 			if (prng_mode == PRNG_MODE_SHA512) {
 				pr_err("The prng module cannot "
 				       "start in SHA-512 mode\n");
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 8aea32fe8bd2..7e3481eb2174 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,8 +1,14 @@
 generic-y += asm-offsets.h
 generic-y += clkdev.h
 generic-y += dma-contiguous.h
+generic-y += div64.h
+generic-y += emergency-restart.h
 generic-y += export.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kmap_types.h
+generic-y += local.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
new file mode 100644
index 000000000000..6033901a40b2
--- /dev/null
+++ b/arch/s390/include/asm/archrandom.h
@@ -0,0 +1,69 @@
+/*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_ARCHRANDOM_H
+#define _ASM_S390_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <linux/static_key.h>
+#include <linux/atomic.h>
+#include <asm/cpacf.h>
+
+DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
+extern atomic64_t s390_arch_random_counter;
+
+static void s390_arch_random_generate(u8 *buf, unsigned int nbytes)
+{
+	cpacf_trng(NULL, 0, buf, nbytes);
+	atomic64_add(nbytes, &s390_arch_random_counter);
+}
+
+static inline bool arch_has_random(void)
+{
+	if (static_branch_likely(&s390_arch_random_available))
+		return true;
+	return false;
+}
+
+static inline bool arch_has_random_seed(void)
+{
+	return arch_has_random();
+}
+
+static inline bool arch_get_random_long(unsigned long *v)
+{
+	if (static_branch_likely(&s390_arch_random_available)) {
+		s390_arch_random_generate((u8 *)v, sizeof(*v));
+		return true;
+	}
+	return false;
+}
+
+static inline bool arch_get_random_int(unsigned int *v)
+{
+	if (static_branch_likely(&s390_arch_random_available)) {
+		s390_arch_random_generate((u8 *)v, sizeof(*v));
+		return true;
+	}
+	return false;
+}
+
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+	return arch_get_random_long(v);
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+	return arch_get_random_int(v);
+}
+
+#endif /* CONFIG_ARCH_RANDOM */
+#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index ac9e2b939d04..ba6d29412344 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -111,20 +111,22 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
 
 static inline int __atomic_cmpxchg(int *ptr, int old, int new)
 {
-	asm volatile(
-		"	cs	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+Q" (*ptr)
-		: [new] "d" (new) : "cc", "memory");
-	return old;
+	return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new)
+{
+	return __sync_bool_compare_and_swap(ptr, old, new);
 }
 
 static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
 {
-	asm volatile(
-		"	csg	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+Q" (*ptr)
-		: [new] "d" (new) : "cc", "memory");
-	return old;
+	return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+{
+	return __sync_bool_compare_and_swap(ptr, old, new);
 }
 
 #endif /* __ARCH_S390_ATOMIC_OPS__  */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d92047da5ccb..99902b7b9f0c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -15,14 +15,6 @@
  * end up numbered:
  *   |63..............0|127............64|191...........128|255...........192|
  *
- * There are a few little-endian macros used mostly for filesystem
- * bitmaps, these work on similar bit array layouts, but byte-oriented:
- *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
- *
- * The main difference is that bit 3-5 in the bit number field needs to be
- * reversed compared to the big-endian bit fields. This can be achieved by
- * XOR with 0x38.
- *
  * We also have special functions which work with an MSB0 encoding.
  * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
@@ -253,6 +245,11 @@ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
 unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
 				unsigned long offset);
 
+#define for_each_set_bit_inv(bit, addr, size)				\
+	for ((bit) = find_first_bit_inv((addr), (size));		\
+	     (bit) < (size);						\
+	     (bit) = find_next_bit_inv((addr), (size), (bit) + 1))
+
 static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
 {
 	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index f7ed88cc066e..7a38ca85190b 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -33,6 +33,24 @@ struct ccw1 {
 	__u32 cda;
 } __attribute__ ((packed,aligned(8)));
 
+/**
+ * struct ccw0 - channel command word
+ * @cmd_code: command code
+ * @cda: data address
+ * @flags: flags, like IDA addressing, etc.
+ * @reserved: will be ignored
+ * @count: byte count
+ *
+ * The format-0 ccw structure.
+ */
+struct ccw0 {
+	__u8 cmd_code;
+	__u32 cda : 24;
+	__u8  flags;
+	__u8  reserved;
+	__u16 count;
+} __packed __aligned(8);
+
 #define CCW_FLAG_DC		0x80
 #define CCW_FLAG_CC		0x40
 #define CCW_FLAG_SLI		0x20
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index e2dfbf280d12..e06f2556b316 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -25,7 +25,8 @@
 #define CPACF_KMO		0xb92b		/* MSA4 */
 #define CPACF_PCC		0xb92c		/* MSA4 */
 #define CPACF_KMCTR		0xb92d		/* MSA4 */
-#define CPACF_PPNO		0xb93c		/* MSA5 */
+#define CPACF_PRNO		0xb93c		/* MSA5 */
+#define CPACF_KMA		0xb929		/* MSA8 */
 
 /*
  * En/decryption modifier bits
@@ -123,12 +124,14 @@
 #define CPACF_PCKMO_ENC_AES_256_KEY	0x14
 
 /*
- * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
  * instruction
  */
-#define CPACF_PPNO_QUERY		0x00
-#define CPACF_PPNO_SHA512_DRNG_GEN	0x03
-#define CPACF_PPNO_SHA512_DRNG_SEED	0x83
+#define CPACF_PRNO_QUERY		0x00
+#define CPACF_PRNO_SHA512_DRNG_GEN	0x03
+#define CPACF_PRNO_SHA512_DRNG_SEED	0x83
+#define CPACF_PRNO_TRNG_Q_R2C_RATIO	0x70
+#define CPACF_PRNO_TRNG			0x72
 
 typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
 
@@ -149,8 +152,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 
 	asm volatile(
 		"	spm 0\n" /* pckmo doesn't change the cc */
-		/* Parameter registers are ignored, but may not be 0 */
-		"0:	.insn	rrf,%[opc] << 16,2,2,2,0\n"
+		/* Parameter regs are ignored, but must be nonzero and unique */
+		"0:	.insn	rrf,%[opc] << 16,2,4,6,0\n"
 		"	brc	1,0b\n"	/* handle partial completion */
 		: "=m" (*mask)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
@@ -173,7 +176,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode)
 	case CPACF_PCC:
 	case CPACF_KMCTR:
 		return test_facility(77);	/* check for MSA4 */
-	case CPACF_PPNO:
+	case CPACF_PRNO:
 		return test_facility(57);	/* check for MSA5 */
 	default:
 		BUG();
@@ -373,18 +376,18 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
 }
 
 /**
- * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION)
  *		  instruction
- * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines
+ * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines
  * @param: address of parameter block; see POP for details on each func
  * @dest: address of destination memory area
  * @dest_len: size of destination memory area in bytes
  * @seed: address of seed data
  * @seed_len: size of seed data in bytes
  */
-static inline void cpacf_ppno(unsigned long func, void *param,
-			      u8 *dest, long dest_len,
-			      const u8 *seed, long seed_len)
+static inline void cpacf_prno(unsigned long func, void *param,
+			      u8 *dest, unsigned long dest_len,
+			      const u8 *seed, unsigned long seed_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -398,7 +401,32 @@ static inline void cpacf_ppno(unsigned long func, void *param,
 		"	brc	1,0b\n"	  /* handle partial completion */
 		: [dst] "+a" (r2), [dlen] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1),
-		  [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
+		  [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO)
+		: "cc", "memory");
+}
+
+/**
+ * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction
+ * @ucbuf: buffer for unconditioned data
+ * @ucbuf_len: amount of unconditioned data to fetch in bytes
+ * @cbuf: buffer for conditioned data
+ * @cbuf_len: amount of conditioned data to fetch in bytes
+ */
+static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
+			      u8 *cbuf, unsigned long cbuf_len)
+{
+	register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG;
+	register unsigned long r2 asm("2") = (unsigned long) ucbuf;
+	register unsigned long r3 asm("3") = (unsigned long) ucbuf_len;
+	register unsigned long r4 asm("4") = (unsigned long) cbuf;
+	register unsigned long r5 asm("5") = (unsigned long) cbuf_len;
+
+	asm volatile (
+		"0:	.insn	rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
+		"	brc	1,0b\n"	  /* handle partial completion */
+		: [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3),
+		  [cbuf] "+a" (r4), [cbuflen] "+d" (r5)
+		: [fc] "d" (r0), [opc] "i" (CPACF_PRNO)
 		: "cc", "memory");
 }
 
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index d1e0707310fd..05480e4cc5ca 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -20,9 +20,11 @@
 #define CPU_MF_INT_SF_PRA	(1 << 29)	/* program request alert */
 #define CPU_MF_INT_SF_SACA	(1 << 23)	/* sampler auth. change alert */
 #define CPU_MF_INT_SF_LSDA	(1 << 22)	/* loss of sample data alert */
+#define CPU_MF_INT_CF_MTDA	(1 << 15)	/* loss of MT ctr. data alert */
 #define CPU_MF_INT_CF_CACA	(1 <<  7)	/* counter auth. change alert */
 #define CPU_MF_INT_CF_LCDA	(1 <<  6)	/* loss of counter data alert */
-#define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \
+				 CPU_MF_INT_CF_LCDA)
 #define CPU_MF_INT_SF_MASK	(CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE|	\
 				 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|	\
 				 CPU_MF_INT_SF_LSDA)
@@ -172,7 +174,7 @@ static inline int lcctl(u64 ctl)
 /* Extract CPU counter */
 static inline int __ecctr(u64 ctr, u64 *content)
 {
-	register u64 _content asm("4") = 0;
+	u64 _content;
 	int cc;
 
 	asm volatile (
diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/s390/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880b3cc1..e8f623041769 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
 #define HWCAP_S390_VXRS		2048
 #define HWCAP_S390_VXRS_BCD	4096
 #define HWCAP_S390_VXRS_EXT	8192
+#define HWCAP_S390_GS		16384
 
 /* Internal bits, not exposed via elf */
 #define HWCAP_INT_SIE		1UL
diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/s390/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 09b406db7529..cb60d5c5755d 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -8,14 +8,11 @@
 #define __ASM_FACILITY_H
 
 #include <generated/facilities.h>
-
-#ifndef __ASSEMBLY__
-
 #include <linux/string.h>
 #include <linux/preempt.h>
 #include <asm/lowcore.h>
 
-#define MAX_FACILITY_BIT (256*8)	/* stfle_fac_list has 256 bytes */
+#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
 
 static inline int __test_facility(unsigned long nr, void *facilities)
 {
@@ -72,5 +69,4 @@ static inline void stfle(u64 *stfle_fac_list, int size)
 	preempt_enable();
 }
 
-#endif /* __ASSEMBLY__ */
 #endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/s390/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 68d7d68300f2..8a0b721a9b8d 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -16,6 +16,7 @@
 #define CONSOLE_ISC 1			/* console I/O subchannel */
 #define EADM_SCH_ISC 4			/* EADM subchannels */
 #define CHSC_SCH_ISC 7			/* CHSC subchannels */
+#define VFIO_CCW_ISC IO_SCH_ISC		/* VFIO-CCW I/O subchannels */
 /* Adapter interrupts. */
 #define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
 #define PCI_ISC 2			/* PCI I/O subchannels */
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
deleted file mode 100644
index 0a88622339ee..000000000000
--- a/arch/s390/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/s390/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/s390/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0e95c0..8a5b082797f8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
 	__u64	stfle_fac_list[32];		/* 0x0f00 */
 	__u8	pad_0x1000[0x11b0-0x1000];	/* 0x1000 */
 
-	/* Pointer to vector register save area */
-	__u64	vector_save_area_addr;		/* 0x11b0 */
+	/* Pointer to the machine check extended save area */
+	__u64	mcesad;				/* 0x11b0 */
 
 	/* 64 bit extparam used for pfault/diag 250: defined by architecture */
 	__u64	ext_params2;			/* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
 
 	/* Transaction abort diagnostic block */
 	__u8	pgm_tdb[256];			/* 0x1800 */
-	__u8	pad_0x1900[0x1c00-0x1900];	/* 0x1900 */
-
-	/* Software defined save area for vector registers */
-	__u8	vector_save_area[1024];		/* 0x1c00 */
+	__u8	pad_0x1900[0x2000-0x1900];	/* 0x1900 */
 } __packed;
 
 #define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index b55a59e1d134..b79813d9cf68 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -8,8 +8,4 @@
 
 #include <uapi/asm/mman.h>
 
-#ifndef __ASSEMBLY__
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
-#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
-#endif
 #endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bea785d7f853..bd6f30304518 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,6 +22,8 @@ typedef struct {
 	unsigned int has_pgste:1;
 	/* The mmu context uses storage keys. */
 	unsigned int use_skey:1;
+	/* The mmu context uses CMMA. */
+	unsigned int use_cmma:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fa2bf69be182..8712e11bead4 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -28,6 +28,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.alloc_pgste = page_table_allocate_pgste;
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
+	mm->context.use_cmma = 0;
 #endif
 	switch (mm->context.asce_limit) {
 	case 1UL << 42:
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd910386a..e3e8895f5d3e 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
 		u64 ie :  1; /* 32 indirect storage error */
 		u64 ar :  1; /* 33 access register validity */
 		u64 da :  1; /* 34 delayed access exception */
-		u64    :  7; /* 35-41 */
+		u64    :  1; /* 35 */
+		u64 gs :  1; /* 36 guarded storage registers */
+		u64    :  5; /* 37-41 */
 		u64 pr :  1; /* 42 tod programmable register validity */
 		u64 fc :  1; /* 43 fp control register validity */
 		u64 ap :  1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
 	};
 };
 
+#define MCESA_ORIGIN_MASK	(~0x3ffUL)
+#define MCESA_LC_MASK		(0xfUL)
+
+struct mcesa {
+	u8 vector_save_area[1024];
+	u8 guarded_storage_save_area[32];
+};
+
 struct pt_regs;
 
 extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
new file mode 100644
index 000000000000..42267a2fe29e
--- /dev/null
+++ b/arch/s390/include/asm/page-states.h
@@ -0,0 +1,19 @@
+/*
+ *    Copyright IBM Corp. 2017
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+ */
+
+#ifndef PAGE_STATES_H
+#define PAGE_STATES_H
+
+#define ESSA_GET_STATE			0
+#define ESSA_SET_STABLE			1
+#define ESSA_SET_UNUSED			2
+#define ESSA_SET_VOLATILE		3
+#define ESSA_SET_POT_VOLATILE		4
+#define ESSA_SET_STABLE_RESIDENT	5
+#define ESSA_SET_STABLE_IF_RESIDENT	6
+
+#define ESSA_MAX	ESSA_SET_STABLE_IF_RESIDENT
+
+#endif
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index c64c0befd3f3..dd32beb9d30c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,7 +1,7 @@
 /*
  * Performance event support - s390 specific definitions.
  *
- * Copyright IBM Corp. 2009, 2013
+ * Copyright IBM Corp. 2009, 2017
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
@@ -47,7 +47,7 @@ struct perf_sf_sde_regs {
 };
 
 /* Perf PMU definitions for the counter facility */
-#define PERF_CPUM_CF_MAX_CTR		256
+#define PERF_CPUM_CF_MAX_CTR		0xffffUL  /* Max ctr for ECCTR */
 
 /* Perf PMU definitions for the sampling facility */
 #define PERF_CPUM_SF_MAX_CTR		2
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ecec682bb516..e6e3b887bee3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -372,10 +372,12 @@ static inline int is_module_addr(void *addr)
 #define PGSTE_VSIE_BIT	0x0000200000000000UL	/* ref'd in a shadow table */
 
 /* Guest Page State used for virtualization */
-#define _PGSTE_GPS_ZERO		0x0000000080000000UL
-#define _PGSTE_GPS_USAGE_MASK	0x0000000003000000UL
-#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
-#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+#define _PGSTE_GPS_ZERO			0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK		0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE		0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED		0x0000000001000000UL
+#define _PGSTE_GPS_USAGE_POT_VOLATILE	0x0000000002000000UL
+#define _PGSTE_GPS_USAGE_VOLATILE	_PGSTE_GPS_USAGE_MASK
 
 /*
  * A user page table pointer has the space-switch-event bit, the
@@ -1041,6 +1043,12 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned char *key);
 
+int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
+				unsigned long bits, unsigned long value);
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+			unsigned long *oldpte, unsigned long *oldpgste);
+
 /*
  * Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index b48aef4188f6..4c484590d858 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -87,4 +87,25 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 int pkey_skey2pkey(const struct pkey_seckey *seckey,
 		   struct pkey_protkey *protkey);
 
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ * @param seckey pointer to buffer with the input secure key
+ * @param pcardnr pointer to cardnr, receives the card number on success
+ * @param pdomain pointer to domain, receives the domain number on success
+ * @param pkeysize pointer to keysize, receives the bitsize of the key
+ * @param pattributes pointer to attributes, receives additional info
+ *	  PKEY_VERIFY_ATTR_AES if the key is an AES key
+ *	  PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in
+ * @return 0 on success, negative errno value on failure. If no card could
+ *	   be found which is able to handle this key, -ENODEV is returned.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+		   u16 *pcardnr, u16 *pdomain,
+		   u16 *pkeysize, u32 *pattributes);
+
 #endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e4988710aa86..60d395fdc864 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -91,14 +91,15 @@ extern void execve_tail(void);
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
  */
 
-#define TASK_SIZE_OF(tsk)	((tsk)->mm ? \
-				 (tsk)->mm->context.asce_limit : TASK_MAX_SIZE)
+#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+					(1UL << 31) : (1UL << 53))
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
 					(1UL << 30) : (1UL << 41))
 #define TASK_SIZE		TASK_SIZE_OF(current)
-#define TASK_MAX_SIZE		(1UL << 53)
+#define TASK_SIZE_MAX		(1UL << 53)
 
-#define STACK_TOP		(1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
+#define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
+					(1UL << 31) : (1UL << 42))
 #define STACK_TOP_MAX		(1UL << 42)
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -135,6 +136,8 @@ struct thread_struct {
 	struct list_head list;
 	/* cpu runtime instrumentation */
 	struct runtime_instr_cb *ri_cb;
+	struct gs_cb *gs_cb;		/* Current guarded storage cb */
+	struct gs_cb *gs_bc_cb;		/* Broadcast guarded storage cb */
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
 	/*
 	 * Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +218,9 @@ void show_cacheinfo(struct seq_file *m);
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
+/* Free guarded storage control block for current */
+void exit_thread_gs(void);
+
 /*
  * Return saved PC of a blocked thread.
  */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a027f3..cd78155b1829 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,8 @@
 #define MACHINE_FLAG_TE		_BITUL(11)
 #define MACHINE_FLAG_TLB_LC	_BITUL(12)
 #define MACHINE_FLAG_VX		_BITUL(13)
-#define MACHINE_FLAG_CAD	_BITUL(14)
-#define MACHINE_FLAG_NX		_BITUL(15)
+#define MACHINE_FLAG_NX		_BITUL(14)
+#define MACHINE_FLAG_GS		_BITUL(15)
 
 #define LPP_MAGIC		_BITUL(31)
 #define LPP_PFAULT_PID_MASK	_AC(0xffffffff, UL)
@@ -68,8 +68,8 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
 #define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
-#define MACHINE_HAS_CAD		(S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
 #define MACHINE_HAS_NX		(S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS		(S390_lowcore.machine_flags & MACHINE_FLAG_GS)
 
 /*
  * Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index 487428b6d099..334e279f1bce 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -2,6 +2,6 @@
 #define _ASM_S390_SPARSEMEM_H
 
 #define SECTION_SIZE_BITS	28
-#define MAX_PHYSMEM_BITS	46
+#define MAX_PHYSMEM_BITS	CONFIG_MAX_PHYSMEM_BITS
 
 #endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index ffc45048ea7d..f7838ecd83c6 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,7 @@
 #define __ASM_SPINLOCK_H
 
 #include <linux/smp.h>
+#include <asm/atomic_ops.h>
 #include <asm/barrier.h>
 #include <asm/processor.h>
 
@@ -17,12 +18,6 @@
 
 extern int spin_retry;
 
-static inline int
-_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
-{
-	return __sync_bool_compare_and_swap(lock, old, new);
-}
-
 #ifndef CONFIG_SMP
 static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
 #else
@@ -40,7 +35,7 @@ bool arch_vcpu_is_preempted(int cpu);
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-void arch_lock_relax(unsigned int cpu);
+void arch_lock_relax(int cpu);
 
 void arch_spin_lock_wait(arch_spinlock_t *);
 int arch_spin_trylock_retry(arch_spinlock_t *);
@@ -70,7 +65,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
 {
 	barrier();
 	return likely(arch_spin_value_unlocked(*lp) &&
-		      _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
+		      __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
 }
 
 static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -95,7 +90,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
-	typecheck(unsigned int, lp->lock);
+	typecheck(int, lp->lock);
 	asm volatile(
 		"st	%1,%0\n"
 		: "+Q" (lp->lock)
@@ -141,16 +136,16 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
 
 static inline int arch_read_trylock_once(arch_rwlock_t *rw)
 {
-	unsigned int old = ACCESS_ONCE(rw->lock);
-	return likely((int) old >= 0 &&
-		      _raw_compare_and_swap(&rw->lock, old, old + 1));
+	int old = ACCESS_ONCE(rw->lock);
+	return likely(old >= 0 &&
+		      __atomic_cmpxchg_bool(&rw->lock, old, old + 1));
 }
 
 static inline int arch_write_trylock_once(arch_rwlock_t *rw)
 {
-	unsigned int old = ACCESS_ONCE(rw->lock);
+	int old = ACCESS_ONCE(rw->lock);
 	return likely(old == 0 &&
-		      _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+		      __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
 }
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -161,9 +156,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
 
 #define __RAW_LOCK(ptr, op_val, op_string)		\
 ({							\
-	unsigned int old_val;				\
+	int old_val;					\
 							\
-	typecheck(unsigned int *, ptr);			\
+	typecheck(int *, ptr);				\
 	asm volatile(					\
 		op_string "	%0,%2,%1\n"		\
 		"bcr	14,0\n"				\
@@ -175,9 +170,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
 
 #define __RAW_UNLOCK(ptr, op_val, op_string)		\
 ({							\
-	unsigned int old_val;				\
+	int old_val;					\
 							\
-	typecheck(unsigned int *, ptr);			\
+	typecheck(int *, ptr);				\
 	asm volatile(					\
 		op_string "	%0,%2,%1\n"		\
 		: "=d" (old_val), "+Q" (*ptr)		\
@@ -187,14 +182,14 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
 })
 
 extern void _raw_read_lock_wait(arch_rwlock_t *lp);
-extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
 
 static inline void arch_read_lock(arch_rwlock_t *rw)
 {
-	unsigned int old;
+	int old;
 
 	old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
-	if ((int) old < 0)
+	if (old < 0)
 		_raw_read_lock_wait(rw);
 }
 
@@ -205,7 +200,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	unsigned int old;
+	int old;
 
 	old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
 	if (old != 0)
@@ -232,11 +227,11 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
-	unsigned int old;
+	int old;
 
 	do {
 		old = ACCESS_ONCE(rw->lock);
-	} while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
+	} while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1));
 }
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
@@ -248,7 +243,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
-	typecheck(unsigned int, rw->lock);
+	typecheck(int, rw->lock);
 
 	rw->owner = 0;
 	asm volatile(
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index d84b6939237c..fe755eec275f 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,14 +6,14 @@
 #endif
 
 typedef struct {
-	unsigned int lock;
+	int lock;
 } __attribute__ ((aligned (4))) arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
 
 typedef struct {
-	unsigned int lock;
-	unsigned int owner;
+	int lock;
+	int owner;
 } arch_rwlock_t;
 
 #define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..f6c2b5814ab0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
 #include <linux/thread_info.h>
 #include <asm/fpu/api.h>
 #include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
 		save_fpu_regs();					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
+		save_gs_cb(prev->thread.gs_cb);				\
 	}								\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
 		set_cpu_flag(CIF_FPU);					\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
+		restore_gs_cb(next->thread.gs_cb);			\
 	}								\
 	prev = __switch_to(prev,next);					\
 } while (0)
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 229326c942c7..73bff45ced55 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -142,7 +142,15 @@ struct sysinfo_3_2_2 {
 
 extern int topology_max_mnest;
 
-#define TOPOLOGY_CORE_BITS	64
+/*
+ * Returns the maximum nesting level supported by the cpu topology code.
+ * The current maximum level is 4 which is the drawer level.
+ */
+static inline int topology_mnest_limit(void)
+{
+	return min(topology_max_mnest, 4);
+}
+
 #define TOPOLOGY_NR_MAG		6
 
 struct topology_core {
@@ -152,7 +160,7 @@ struct topology_core {
 	unsigned char pp:2;
 	unsigned char reserved1;
 	unsigned short origin;
-	unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG];
+	unsigned long mask;
 };
 
 struct topology_container {
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a445eb8..f36e6e2b73f0 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define TIF_NOTIFY_RESUME	0	/* callback before returning to user */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SYSCALL_TRACE	3	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
-#define TIF_SECCOMP		5	/* secure computing */
-#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
-#define TIF_UPROBE		7	/* breakpointed or single-stepping */
+#define TIF_UPROBE		3	/* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE	4	/* load guarded storage control block */
+#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	9	/* syscall auditing active */
+#define TIF_SECCOMP		10	/* secure computing */
+#define TIF_SYSCALL_TRACEPOINT	11	/* syscall tracepoint instrumentation */
 #define TIF_31BIT		16	/* 32bit process */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define _TIF_UPROBE		_BITUL(TIF_UPROBE)
 #define _TIF_31BIT		_BITUL(TIF_31BIT)
 #define _TIF_SINGLE_STEP	_BITUL(TIF_SINGLE_STEP)
+#define _TIF_GUARDED_STORAGE	_BITUL(TIF_GUARDED_STORAGE)
 
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5c1454..addb09cee0f5 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,6 +1,16 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += mman.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sockios.h
+generic-y += termbits.h
+
 header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += byteorder.h
@@ -11,25 +21,20 @@ header-y += cmb.h
 header-y += dasd.h
 header-y += debug.h
 header-y += errno.h
-header-y += fcntl.h
+header-y += guarded_storage.h
 header-y += hypfs.h
-header-y += ioctl.h
 header-y += ioctls.h
 header-y += ipcbuf.h
 header-y += kvm.h
 header-y += kvm_para.h
 header-y += kvm_perf.h
 header-y += kvm_virtio.h
-header-y += mman.h
 header-y += monwriter.h
 header-y += msgbuf.h
-header-y += param.h
 header-y += pkey.h
-header-y += poll.h
 header-y += posix_types.h
 header-y += ptrace.h
 header-y += qeth.h
-header-y += resource.h
 header-y += schid.h
 header-y += sclp_ctl.h
 header-y += sembuf.h
@@ -40,12 +45,10 @@ header-y += sigcontext.h
 header-y += siginfo.h
 header-y += signal.h
 header-y += socket.h
-header-y += sockios.h
 header-y += stat.h
 header-y += statfs.h
 header-y += swab.h
 header-y += tape390.h
-header-y += termbits.h
 header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h
deleted file mode 100644
index 395e97d8005e..000000000000
--- a/arch/s390/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- *  S390 version
- *
- */
-
-#ifndef _S390_ERRNO_H
-#define _S390_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/s390/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000000..852850e8e17e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+	__u64 reserved;
+	__u64 gsd;
+	__u64 gssm;
+	__u64 gs_epl_a;
+};
+
+struct gs_epl {
+	__u8 pad1;
+	union {
+		__u8 gs_eam;
+		struct {
+			__u8	: 6;
+			__u8 e	: 1;
+			__u8 b	: 1;
+		};
+	};
+	union {
+		__u8 gs_eci;
+		struct {
+			__u8 tx	: 1;
+			__u8 cx	: 1;
+			__u8	: 5;
+			__u8 in	: 1;
+		};
+	};
+	union {
+		__u8 gs_eai;
+		struct {
+			__u8	: 1;
+			__u8 t	: 1;
+			__u8 as	: 2;
+			__u8 ar	: 4;
+		};
+	};
+	__u32 pad2;
+	__u64 gs_eha;
+	__u64 gs_eia;
+	__u64 gs_eoa;
+	__u64 gs_eir;
+	__u64 gs_era;
+};
+
+#define GS_ENABLE	0
+#define	GS_DISABLE	1
+#define GS_SET_BC_CB	2
+#define GS_CLEAR_BC_CB	3
+#define GS_BROADCAST	4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+	asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+	asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+	if (gs_cb)
+		store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+	if (gs_cb)
+		load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/s390/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h
deleted file mode 100644
index de23da1f41b2..000000000000
--- a/arch/s390/include/uapi/asm/mman.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/mman.h"
- */
-#include <asm-generic/mman.h>
diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h
deleted file mode 100644
index c616821bf2ac..000000000000
--- a/arch/s390/include/uapi/asm/param.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASMS390_PARAM_H
-#define _ASMS390_PARAM_H
-
-#include <asm-generic/param.h>
-
-#endif /* _ASMS390_PARAM_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index ed7f19c27ce5..e6c04faf8a6c 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -109,4 +109,23 @@ struct pkey_skey2pkey {
 };
 #define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
 
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ */
+struct pkey_verifykey {
+	struct pkey_seckey seckey;	       /* in: the secure key blob */
+	__u16  cardnr;			       /* out: card number	  */
+	__u16  domain;			       /* out: domain number	  */
+	__u16  keysize;			       /* out: key size in bits   */
+	__u32  attributes;		       /* out: attribute bits	  */
+};
+#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
+#define PKEY_VERIFY_ATTR_AES	   0x00000001  /* key is an AES key */
+#define PKEY_VERIFY_ATTR_OLD_MKVP  0x00000100  /* key has old MKVP value */
+
 #endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/s390/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h
deleted file mode 100644
index ec23d1c73c92..000000000000
--- a/arch/s390/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/resources.h"
- */
-
-#ifndef _S390_RESOURCE_H
-#define _S390_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif
-
diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h
deleted file mode 100644
index 6f60eee73242..000000000000
--- a/arch/s390/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_S390_SOCKIOS_H
-#define _ASM_S390_SOCKIOS_H
-
-#include <asm-generic/sockios.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h
deleted file mode 100644
index 71bf6ac6a2b9..000000000000
--- a/arch/s390/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_S390_TERMBITS_H
-#define _ASM_S390_TERMBITS_H
-
-#include <asm-generic/termbits.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 152de9b796e1..ea42290e7d51 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,7 @@
 #define __NR_copy_file_range	375
 #define __NR_preadv2		376
 #define __NR_pwritev2		377
-/* Number 378 is reserved for guarded storage */
+#define __NR_s390_guarded_storage	378
 #define __NR_statx		379
 #define NR_syscalls 380
 
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce548fe8b..adb3fe2e3d42 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -51,14 +51,12 @@ CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 #
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-CFLAGS_sysinfo.o	+= -w
-
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o
-obj-y	+= entry.o reipl.o relocate_kernel.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
+obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o
 
 extra-y				+= head.o head64.o vmlinux.lds
 
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570ded5b..6bb29633e1f1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
 	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
 	OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
 	/* hardware defined lowcore locations 0x1000 - 0x18ff */
-	OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+	OFFSET(__LC_MCESAD, lowcore, mcesad);
 	OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
 	OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
 	OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index e89cc2e71db1..986642a3543b 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
 COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index dd1d5c62c374..d628afc26708 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -429,6 +429,20 @@ static void *nt_vmcoreinfo(void *ptr)
 }
 
 /*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+	Elf64_Nhdr *note;
+
+	note = (Elf64_Nhdr *) ptr;
+	note->n_namesz = 0;
+	note->n_descsz = 0;
+	note->n_type = 0;
+	return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
+}
+
+/*
  * Initialize ELF header (new kernel)
  */
 static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
@@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
 		if (sa->prefix != 0)
 			ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
 	ptr = nt_vmcoreinfo(ptr);
+	ptr = nt_final(ptr);
 	memset(phdr, 0, sizeof(*phdr));
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = notes_offset;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79cc5f2..5d20182ee8ae 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,9 +231,29 @@ static noinline __init void detect_machine_type(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+	char *s;
+
+	s = strim(str);
+	if (s != str)
+		memmove(str, s, strlen(s));
+	while (*str) {
+		if (!isspace(*str++))
+			continue;
+		if (isspace(*str)) {
+			s = skip_spaces(str);
+			memmove(str, s, strlen(s) + 1);
+		}
+	}
+}
+
 static noinline __init void setup_arch_string(void)
 {
 	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+	struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+	char mstr[80], hvstr[17];
 
 	if (stsi(mach, 1, 1, 1))
 		return;
@@ -241,14 +261,21 @@ static noinline __init void setup_arch_string(void)
 	EBCASC(mach->type, sizeof(mach->type));
 	EBCASC(mach->model, sizeof(mach->model));
 	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
-	dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
-				 mach->manufacturer,
-				 mach->type,
-				 mach->model,
-				 mach->model_capacity,
-				 MACHINE_IS_LPAR ? "LPAR" :
-				 MACHINE_IS_VM ? "z/VM" :
-				 MACHINE_IS_KVM ? "KVM" : "unknown");
+	sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
+		mach->manufacturer, mach->type,
+		mach->model, mach->model_capacity);
+	strim_all(mstr);
+	if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+		EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+		sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+		strim_all(hvstr);
+	} else {
+		sprintf(hvstr, "%s",
+			MACHINE_IS_LPAR ? "LPAR" :
+			MACHINE_IS_VM ? "z/VM" :
+			MACHINE_IS_KVM ? "KVM" : "unknown");
+	}
+	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
 }
 
 static __init void setup_topology(void)
@@ -358,6 +385,8 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
 		__ctl_set_bit(0, 20);
 	}
+	if (test_facility(133))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
 }
 
 static inline void save_vector_registers(void)
@@ -375,7 +404,7 @@ static int __init topology_setup(char *str)
 
 	rc = kstrtobool(str, &enabled);
 	if (!rc && !enabled)
-		S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY;
+		S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
 	return rc;
 }
 early_param("topology", topology_setup);
@@ -405,23 +434,16 @@ early_param("noexec", noexec_setup);
 
 static int __init cad_setup(char *str)
 {
-	int val;
-
-	get_option(&str, &val);
-	if (val && test_facility(128))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
-	return 0;
-}
-early_param("cad", cad_setup);
+	bool enabled;
+	int rc;
 
-static int __init cad_init(void)
-{
-	if (MACHINE_HAS_CAD)
+	rc = kstrtobool(str, &enabled);
+	if (!rc && enabled && test_facility(128))
 		/* Enable problem state CAD. */
 		__ctl_set_bit(2, 3);
-	return 0;
+	return rc;
 }
-early_initcall(cad_init);
+early_param("cad", cad_setup);
 
 static __init void memmove_early(void *dst, const void *src, size_t n)
 {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a7d737d514c..c6cf338c9327 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE  = 1 << STACK_SHIFT
 STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
 _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		   _TIF_UPROBE)
+		   _TIF_UPROBE | _TIF_GUARDED_STORAGE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
 _CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -189,8 +189,6 @@ ENTRY(__switch_to)
 	stg	%r3,__LC_CURRENT		# store task struct of next
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
-	/* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
-	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
@@ -332,6 +330,8 @@ ENTRY(system_call)
 	TSTMSK	__TI_flags(%r12),_TIF_UPROBE
 	jo	.Lsysc_uprobe_notify
 #endif
+	TSTMSK	__TI_flags(%r12),_TIF_GUARDED_STORAGE
+	jo	.Lsysc_guarded_storage
 	TSTMSK	__PT_FLAGS(%r11),_PIF_PER_TRAP
 	jo	.Lsysc_singlestep
 	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +409,14 @@ ENTRY(system_call)
 #endif
 
 #
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	gs_load_bc_cb
+
+#
 # _PIF_PER_TRAP is set, call do_per_trap
 #
 .Lsysc_singlestep:
@@ -663,6 +671,8 @@ ENTRY(io_int_handler)
 	jo	.Lio_sigpending
 	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
+	TSTMSK	__TI_flags(%r12),_TIF_GUARDED_STORAGE
+	jo	.Lio_guarded_storage
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lio_vxrs
 	TSTMSK	__LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +707,18 @@ ENTRY(io_int_handler)
 	jg	load_fpu_regs
 
 #
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+	# TRACE_IRQS_ON already done at .Lio_return
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,gs_load_bc_cb
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	j	.Lio_return
+
+#
 # _TIF_NEED_RESCHED is set, call schedule
 #
 .Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f901865326..dbf5f7e18246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
 
 long sys_s390_personality(unsigned int personality);
 long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
 long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
 long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
 void verify_facilities(void);
+void gs_load_bc_cb(struct pt_regs *regs);
 void set_fs_fixup(void);
 
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..6f064745c3b1
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void exit_thread_gs(void)
+{
+	kfree(current->thread.gs_cb);
+	kfree(current->thread.gs_bc_cb);
+	current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
+}
+
+static int gs_enable(void)
+{
+	struct gs_cb *gs_cb;
+
+	if (!current->thread.gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		gs_cb->gsd = 25;
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_disable(void)
+{
+	if (current->thread.gs_cb) {
+		preempt_disable();
+		kfree(current->thread.gs_cb);
+		current->thread.gs_cb = NULL;
+		__ctl_clear_bit(2, 4);
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	if (!gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		current->thread.gs_bc_cb = gs_cb;
+	}
+	if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+		return -EFAULT;
+	return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	current->thread.gs_bc_cb = NULL;
+	kfree(gs_cb);
+	return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+	struct gs_cb *gs_cb;
+
+	preempt_disable();
+	clear_thread_flag(TIF_GUARDED_STORAGE);
+	gs_cb = current->thread.gs_bc_cb;
+	if (gs_cb) {
+		kfree(current->thread.gs_cb);
+		current->thread.gs_bc_cb = NULL;
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+	}
+	preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+	struct task_struct *sibling;
+
+	read_lock(&tasklist_lock);
+	for_each_thread(current, sibling) {
+		if (!sibling->thread.gs_bc_cb)
+			continue;
+		if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+			kick_process(sibling);
+	}
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+		struct gs_cb __user *, gs_cb)
+{
+	if (!MACHINE_HAS_GS)
+		return -EOPNOTSUPP;
+	switch (command) {
+	case GS_ENABLE:
+		return gs_enable();
+	case GS_DISABLE:
+		return gs_disable();
+	case GS_SET_BC_CB:
+		return gs_set_bc_cb(gs_cb);
+	case GS_CLEAR_BC_CB:
+		return gs_clear_bc_cb();
+	case GS_BROADCAST:
+		return gs_broadcast();
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 0b5ebf8a3d30..eff5b31671d4 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -25,7 +25,6 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/facility.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 482d3526e32b..31c91f24e562 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -52,7 +52,7 @@ ENTRY(startup_continue)
 	.quad	0			# cr1: primary space segment table
 	.quad	.Lduct			# cr2: dispatchable unit control table
 	.quad	0			# cr3: instruction authorization
-	.quad	0			# cr4: instruction authorization
+	.quad	0xffff			# cr4: instruction authorization
 	.quad	.Lduct			# cr5: primary-aste origin
 	.quad	0			# cr6:	I/O interrupts
 	.quad	0			# cr7:	secondary space segment table
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 000000000000..ee85e17dd79d
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,15 @@
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+	if (IS_ERR(arch_debugfs_dir))
+		arch_debugfs_dir = NULL;
+	return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d83829..db5658daf994 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
 #include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
+#include <asm/nmi.h>
 
 typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
 
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
  */
 static noinline void __machine_kdump(void *image)
 {
+	struct mcesa *mcesa;
+	unsigned long cr2_old, cr2_new;
 	int this_cpu, cpu;
 
 	lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
 			continue;
 	}
 	/* Store status of the boot CPU */
+	mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
 	if (MACHINE_HAS_VX)
-		save_vx_regs((void *) &S390_lowcore.vector_save_area);
+		save_vx_regs((__vector128 *) mcesa->vector_save_area);
+	if (MACHINE_HAS_GS) {
+		__ctl_store(cr2_old, 2, 2);
+		cr2_new = cr2_old | (1UL << 4);
+		__ctl_load(cr2_new, 2, 2);
+		save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+		__ctl_load(cr2_old, 2, 2);
+	}
 	/*
 	 * To create a good backchain for this CPU in the dump store_status
 	 * is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327154ee..985589523970 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
 	int kill_task;
 	u64 zero;
 	void *fpt_save_area;
+	struct mcesa *mcesa;
 
 	kill_task = 0;
 	zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
 			     : : "Q" (S390_lowcore.fpt_creg_save_area));
 	}
 
+	mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
 	if (!MACHINE_HAS_VX) {
 		/* Validate floating point registers */
 		asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
 			"	la	1,%0\n"
 			"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
 			"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-			: : "Q" (*(struct vx_array *)
-				 &S390_lowcore.vector_save_area) : "1");
+			: : "Q" (*(struct vx_array *) mcesa->vector_save_area)
+			: "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	/* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
 		 */
 		kill_task = 1;
 	}
+	/* Validate guarded storage registers */
+	if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
+		if (!mci.gs)
+			/*
+			 * Guarded storage register can't be restored and
+			 * the current processes uses guarded storage.
+			 * It has to be terminated.
+			 */
+			kill_task = 1;
+		else
+			load_gs_cb((struct gs_cb *)
+				   mcesa->guarded_storage_save_area);
+	}
 	/*
 	 * We don't even try to validate the TOD register, since we simply
 	 * can't write something sensible into that register.
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1aba10e90906..746d03423333 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,7 +1,7 @@
 /*
  * Performance event support for s390x - CPU-measurement Counter Facility
  *
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2017
  *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -22,19 +22,12 @@
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 
-/* CPU-measurement counter facility supports these CPU counter sets:
- * For CPU counter sets:
- *    Basic counter set:	     0-31
- *    Problem-state counter set:    32-63
- *    Crypto-activity counter set:  64-127
- *    Extented counter set:	   128-159
- */
 enum cpumf_ctr_set {
-	/* CPU counter sets */
-	CPUMF_CTR_SET_BASIC   = 0,
-	CPUMF_CTR_SET_USER    = 1,
-	CPUMF_CTR_SET_CRYPTO  = 2,
-	CPUMF_CTR_SET_EXT     = 3,
+	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
+	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
+	CPUMF_CTR_SET_CRYPTO  = 2,    /* Crypto-Activity Counter Set */
+	CPUMF_CTR_SET_EXT     = 3,    /* Extended Counter Set */
+	CPUMF_CTR_SET_MT_DIAG = 4,    /* MT-diagnostic Counter Set */
 
 	/* Maximum number of counter sets */
 	CPUMF_CTR_SET_MAX,
@@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
 	[CPUMF_CTR_SET_USER]	= 0x04,
 	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
 	[CPUMF_CTR_SET_EXT]	= 0x01,
+	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
 };
 
 static void ctr_set_enable(u64 *state, int ctr_set)
@@ -76,19 +70,20 @@ struct cpu_hw_events {
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.ctr_set = {
-		[CPUMF_CTR_SET_BASIC]  = ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_USER]   = ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_EXT]    = ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_BASIC]	= ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_USER]	= ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_CRYPTO]	= ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_EXT]	= ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
 	},
 	.state = 0,
 	.flags = 0,
 	.txn_flags = 0,
 };
 
-static int get_counter_set(u64 event)
+static enum cpumf_ctr_set get_counter_set(u64 event)
 {
-	int set = -1;
+	int set = CPUMF_CTR_SET_MAX;
 
 	if (event < 32)
 		set = CPUMF_CTR_SET_BASIC;
@@ -98,34 +93,17 @@ static int get_counter_set(u64 event)
 		set = CPUMF_CTR_SET_CRYPTO;
 	else if (event < 256)
 		set = CPUMF_CTR_SET_EXT;
+	else if (event >= 448 && event < 496)
+		set = CPUMF_CTR_SET_MT_DIAG;
 
 	return set;
 }
 
-static int validate_event(const struct hw_perf_event *hwc)
-{
-	switch (hwc->config_base) {
-	case CPUMF_CTR_SET_BASIC:
-	case CPUMF_CTR_SET_USER:
-	case CPUMF_CTR_SET_CRYPTO:
-	case CPUMF_CTR_SET_EXT:
-		/* check for reserved counters */
-		if ((hwc->config >=  6 && hwc->config <=  31) ||
-		    (hwc->config >= 38 && hwc->config <=  63) ||
-		    (hwc->config >= 80 && hwc->config <= 127))
-			return -EOPNOTSUPP;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int validate_ctr_version(const struct hw_perf_event *hwc)
 {
 	struct cpu_hw_events *cpuhw;
 	int err = 0;
+	u16 mtdiag_ctl;
 
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
@@ -145,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
 		    (cpuhw->info.csvn  > 2 && hwc->config > 255))
 			err = -EOPNOTSUPP;
 		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpuhw->info.csvn <= 3)
+			err = -EOPNOTSUPP;
+		/*
+		 * MT-diagnostic counters are read-only.  The counter set
+		 * is automatically enabled and activated on all CPUs with
+		 * multithreading (SMT).  Deactivation of multithreading
+		 * also disables the counter set.  State changes are ignored
+		 * by lcctl().	Because Linux controls SMT enablement through
+		 * a kernel parameter only, the counter set is either disabled
+		 * or enabled and active.
+		 *
+		 * Thus, the counters can only be used if SMT is on and the
+		 * counter set is enabled and active.
+		 */
+		mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG];
+		if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
+		      (cpuhw->info.enable_ctl & mtdiag_ctl) &&
+		      (cpuhw->info.act_ctl & mtdiag_ctl)))
+			err = -EOPNOTSUPP;
+		break;
 	}
 
 	put_cpu_var(cpu_hw_events);
@@ -250,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 	/* loss of counter data alert */
 	if (alert & CPU_MF_INT_CF_LCDA)
 		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+	/* loss of MT counter data alert */
+	if (alert & CPU_MF_INT_CF_MTDA)
+		pr_warn("CPU[%i] MT counter data was lost\n",
+			smp_processor_id());
 }
 
 #define PMC_INIT      0
@@ -330,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
+	enum cpumf_ctr_set set;
 	int err;
 	u64 ev;
 
@@ -370,25 +375,30 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (ev == -1)
 		return -ENOENT;
 
-	if (ev >= PERF_CPUM_CF_MAX_CTR)
+	if (ev > PERF_CPUM_CF_MAX_CTR)
 		return -EINVAL;
 
-	/* Use the hardware perf event structure to store the counter number
-	 * in 'config' member and the counter set to which the counter belongs
-	 * in the 'config_base'.  The counter set (config_base) is then used
-	 * to enable/disable the counters.
-	 */
-	hwc->config = ev;
-	hwc->config_base = get_counter_set(ev);
-
-	/* Validate the counter that is assigned to this event.
-	 * Because the counter facility can use numerous counters at the
-	 * same time without constraints, it is not necessary to explicitly
-	 * validate event groups (event->group_leader != event).
-	 */
-	err = validate_event(hwc);
-	if (err)
-		return err;
+	/* Obtain the counter set to which the specified counter belongs */
+	set = get_counter_set(ev);
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+	case CPUMF_CTR_SET_CRYPTO:
+	case CPUMF_CTR_SET_EXT:
+	case CPUMF_CTR_SET_MT_DIAG:
+		/*
+		 * Use the hardware perf event structure to store the
+		 * counter number in the 'config' member and the counter
+		 * set number in the 'config_base'.  The counter set number
+		 * is then later used to enable/disable the counter(s).
+		 */
+		hwc->config = ev;
+		hwc->config_base = set;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		/* The counter could not be associated to a counter set */
+		return -EINVAL;
+	};
 
 	/* Initialize for using the CPU-measurement counter facility */
 	if (!atomic_inc_not_zero(&num_events)) {
@@ -452,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event)
 	return err;
 }
 
-static int hw_perf_event_update(struct perf_event *event)
+static void hw_perf_event_update(struct perf_event *event)
 {
 	u64 prev, new, delta;
 	int err;
@@ -461,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event)
 		prev = local64_read(&event->hw.prev_count);
 		err = ecctr(event->hw.config, &new);
 		if (err)
-			goto out;
+			return;
 	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 
 	delta = (prev <= new) ? new - prev
 			      : (-1ULL - prev) + new + 1;	 /* overflow */
 	local64_add(delta, &event->count);
-out:
-	return err;
 }
 
 static void cpumf_pmu_read(struct perf_event *event)
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index c343ac2cf6c5..d3133285b7d1 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
 CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
 CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
 CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 
-static struct attribute *cpumcf_pmu_event_attr[] = {
+static struct attribute *cpumcf_pmu_event_attr[] __initdata = {
 	CPUMF_EVENT_PTR(cf, CPU_CYCLES),
 	CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
 	CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
@@ -236,28 +292,87 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
 	NULL,
 };
 
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
 /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
 
-static struct attribute_group cpumsf_pmu_events_group = {
+static struct attribute_group cpumcf_pmu_events_group = {
 	.name = "events",
-	.attrs = cpumcf_pmu_event_attr,
 };
 
 PMU_FORMAT_ATTR(event, "config:0-63");
 
-static struct attribute *cpumsf_pmu_format_attr[] = {
+static struct attribute *cpumcf_pmu_format_attr[] = {
 	&format_attr_event.attr,
 	NULL,
 };
 
-static struct attribute_group cpumsf_pmu_format_group = {
+static struct attribute_group cpumcf_pmu_format_group = {
 	.name = "format",
-	.attrs = cpumsf_pmu_format_attr,
+	.attrs = cpumcf_pmu_format_attr,
 };
 
-static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
-	&cpumsf_pmu_events_group,
-	&cpumsf_pmu_format_group,
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+	&cpumcf_pmu_events_group,
+	&cpumcf_pmu_format_group,
 	NULL,
 };
 
@@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a,
 __init const struct attribute_group **cpumf_cf_event_group(void)
 {
 	struct attribute **combined, **model;
+	struct attribute *none[] = { NULL };
 	struct cpuid cpu_id;
 
 	get_cpu_id(&cpu_id);
@@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
 	case 0x2828:
 		model = cpumcf_zec12_pmu_event_attr;
 		break;
+	case 0x2964:
+	case 0x2965:
+		model = cpumcf_z13_pmu_event_attr;
+		break;
 	default:
-		model = NULL;
+		model = none;
 		break;
 	}
 
-	if (!model)
-		goto out;
-
 	combined = merge_attr(cpumcf_pmu_event_attr, model);
 	if (combined)
-		cpumsf_pmu_events_group.attrs = combined;
-out:
-	return cpumsf_pmu_attr_groups;
+		cpumcf_pmu_events_group.attrs = combined;
+	return cpumcf_pmu_attr_groups;
 }
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1c0b58545c04..9a4f279d25ca 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 	 * sample. Some early samples or samples from guests without
 	 * lpp usage would be misaccounted to the host. We use the asn
 	 * value as an addon heuristic to detect most of these guest samples.
-	 * If the value differs from the host hpp value, we assume to be a
-	 * KVM guest.
+	 * If the value differs from 0xffff (the host value), we assume to
+	 * be a KVM guest.
 	 */
 	switch (sfr->basic.CL) {
 	case 1: /* logical partition */
@@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		sde_regs->in_guest = 1;
 		break;
 	default: /* old machine, use heuristics */
-		if (sfr->basic.gpp ||
-		    sfr->basic.prim_asn != (u16)sfr->basic.hpp)
+		if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff)
 			sde_regs->in_guest = 1;
 		break;
 	}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f29e41c5e2ec..999d7154bbdc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
  */
 void exit_thread(struct task_struct *tsk)
 {
-	if (tsk == current)
+	if (tsk == current) {
 		exit_thread_runtime_instr();
+		exit_thread_gs();
+	}
 }
 
 void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
 	/* Don't copy runtime instrumentation info */
 	p->thread.ri_cb = NULL;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
+	/* Don't copy guarded storage control block */
+	p->thread.gs_cb = NULL;
+	p->thread.gs_bc_cb = NULL;
 
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929a6261..778cd6536175 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -7,6 +7,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/cpufeature.h>
+#include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/init.h>
@@ -91,11 +92,23 @@ int cpu_have_feature(unsigned int num)
 }
 EXPORT_SYMBOL(cpu_have_feature);
 
+static void show_facilities(struct seq_file *m)
+{
+	unsigned int bit;
+	long *facilities;
+
+	facilities = (long *)&S390_lowcore.stfle_fac_list;
+	seq_puts(m, "facilities      :");
+	for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT)
+		seq_printf(m, " %d", bit);
+	seq_putc(m, '\n');
+}
+
 static void show_cpu_summary(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
-		"edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe"
+		"edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
 	};
 	static const char * const int_hwcap_str[] = {
 		"sie"
@@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
 		if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
 			seq_printf(m, "%s ", int_hwcap_str[i]);
 	seq_puts(m, "\n");
+	show_facilities(m);
 	show_cacheinfo(m);
 	for_each_online_cpu(cpu) {
 		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a1ec3c..488c5bb8dc77 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
 	struct pt_regs *regs = task_pt_regs(task);
 	struct thread_struct *thread = &task->thread;
 	struct per_regs old, new;
-
+	unsigned long cr0_old, cr0_new;
+	unsigned long cr2_old, cr2_new;
+	int cr0_changed, cr2_changed;
+
+	__ctl_store(cr0_old, 0, 0);
+	__ctl_store(cr2_old, 2, 2);
+	cr0_new = cr0_old;
+	cr2_new = cr2_old;
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
-		unsigned long cr, cr_new;
-
-		__ctl_store(cr, 0, 0);
 		/* Set or clear transaction execution TXC bit 8. */
-		cr_new = cr | (1UL << 55);
+		cr0_new |= (1UL << 55);
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new &= ~(1UL << 55);
-		if (cr_new != cr)
-			__ctl_load(cr_new, 0, 0);
+			cr0_new &= ~(1UL << 55);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
-		__ctl_store(cr, 2, 2);
-		cr_new = cr & ~3UL;
+		cr2_new &= ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
 			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-				cr_new |= 1UL;
+				cr2_new |= 1UL;
 			else
-				cr_new |= 2UL;
+				cr2_new |= 2UL;
 		}
-		if (cr_new != cr)
-			__ctl_load(cr_new, 2, 2);
 	}
+	/* Take care of enable/disable of guarded storage. */
+	if (MACHINE_HAS_GS) {
+		cr2_new &= ~(1UL << 4);
+		if (task->thread.gs_cb)
+			cr2_new |= (1UL << 4);
+	}
+	/* Load control register 0/2 iff changed */
+	cr0_changed = cr0_new != cr0_old;
+	cr2_changed = cr2_new != cr2_old;
+	if (cr0_changed)
+		__ctl_load(cr0_new, 0, 0);
+	if (cr2_changed)
+		__ctl_load(cr2_new, 2, 2);
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
 	new.start = thread->per_user.start;
@@ -1137,6 +1149,74 @@ static int s390_system_call_set(struct task_struct *target,
 				  data, 0, sizeof(unsigned int));
 }
 
+static int s390_gs_cb_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+		target->thread.gs_cb = data;
+	}
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+		target->thread.gs_bc_cb = data;
+	}
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(struct gs_cb));
+}
+
 static const struct user_regset s390_regsets[] = {
 	{
 		.core_note_type = NT_PRSTATUS,
@@ -1194,6 +1274,22 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_BC,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.get = s390_gs_bc_get,
+		.set = s390_gs_bc_set,
+	},
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1518,14 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_compat_regs_high_get,
 		.set = s390_compat_regs_high_set,
 	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
 };
 
 static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b49be0..3ae756c0db3d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
 	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 	       MAX_FACILITY_BIT/8);
-	if (MACHINE_HAS_VX)
-		lc->vector_save_area_addr =
-			(unsigned long) &lc->vector_save_area;
+	if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+		unsigned long bits, size;
+
+		bits = MACHINE_HAS_GS ? 11 : 10;
+		size = 1UL << bits;
+		lc->mcesad = (__u64) memblock_virt_alloc(size, size);
+		if (MACHINE_HAS_GS)
+			lc->mcesad |= bits;
+	}
 	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
 	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 	lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
 			elf_hwcap |= HWCAP_S390_VXRS_BCD;
 	}
 
+	/*
+	 * Guarded storage support HWCAP_S390_GS is bit 12.
+	 */
+	if (MACHINE_HAS_GS)
+		elf_hwcap |= HWCAP_S390_GS;
+
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
 	switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5dab859b0d54..363000a77ffc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
 #include <asm/os_info.h>
 #include <asm/sigp.h>
 #include <asm/idle.h>
+#include <asm/nmi.h>
 #include "entry.h"
 
 enum {
@@ -78,6 +79,8 @@ struct pcpu {
 static u8 boot_core_type;
 static struct pcpu pcpu_devices[NR_CPUS];
 
+static struct kmem_cache *pcpu_mcesa_cache;
+
 unsigned int smp_cpu_mt_shift;
 EXPORT_SYMBOL(smp_cpu_mt_shift);
 
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 {
 	unsigned long async_stack, panic_stack;
+	unsigned long mcesa_origin, mcesa_bits;
 	struct lowcore *lc;
 
+	mcesa_origin = mcesa_bits = 0;
 	if (pcpu != &pcpu_devices[0]) {
 		pcpu->lowcore =	(struct lowcore *)
 			__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 		panic_stack = __get_free_page(GFP_KERNEL);
 		if (!pcpu->lowcore || !panic_stack || !async_stack)
 			goto out;
+		if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+			mcesa_origin = (unsigned long)
+				kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
+			if (!mcesa_origin)
+				goto out;
+			mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
+		}
 	} else {
 		async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
 		panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+		mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+		mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
 	}
 	lc = pcpu->lowcore;
 	memcpy(lc, &S390_lowcore, 512);
 	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
 	lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
 	lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+	lc->mcesad = mcesa_origin | mcesa_bits;
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
-	if (MACHINE_HAS_VX)
-		lc->vector_save_area_addr =
-			(unsigned long) &lc->vector_save_area;
 	if (vdso_alloc_per_cpu(lc))
 		goto out;
 	lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	return 0;
 out:
 	if (pcpu != &pcpu_devices[0]) {
+		if (mcesa_origin)
+			kmem_cache_free(pcpu_mcesa_cache,
+					(void *) mcesa_origin);
 		free_page(panic_stack);
 		free_pages(async_stack, ASYNC_ORDER);
 		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
 
 static void pcpu_free_lowcore(struct pcpu *pcpu)
 {
+	unsigned long mcesa_origin;
+
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
 	lowcore_ptr[pcpu - pcpu_devices] = NULL;
 	vdso_free_per_cpu(pcpu->lowcore);
 	if (pcpu == &pcpu_devices[0])
 		return;
+	if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+		mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+		kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
+	}
 	free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
 	free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
 	free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
 			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
 		return -EIO;
-	if (!MACHINE_HAS_VX)
+	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
 		return 0;
-	pa = __pa(pcpu->lowcore->vector_save_area_addr);
+	pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+	if (MACHINE_HAS_GS)
+		pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
 			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
 		return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+	unsigned long size;
+
 	/* request the 0x1201 emergency signal external interrupt */
 	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
 		panic("Couldn't request external interrupt 0x1201");
 	/* request the 0x1202 external call external interrupt */
 	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
 		panic("Couldn't request external interrupt 0x1202");
+	/* create slab cache for the machine-check-extended-save-areas */
+	if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+		size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
+		pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
+						     size, size, 0, NULL);
+		if (!pcpu_mcesa_cache)
+			panic("Couldn't create nmi save area cache");
+	}
 }
 
 void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2659b5cfeddb..54fce7b065de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
-NI_SYSCALL
+SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
 SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 12b6b138e354..eefcb54872a5 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -4,6 +4,7 @@
  *	       Martin Schwidefsky <schwidefsky@de.ibm.com>,
  */
 
+#include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
@@ -13,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <asm/ebcdic.h>
+#include <asm/debug.h>
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
@@ -485,3 +487,99 @@ void calibrate_delay(void)
 	       "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
 	       (loops_per_jiffy/(5000/HZ)) % 100);
 }
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2)						       \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{									       \
+	file->private_data = (void *) get_zeroed_page(GFP_KERNEL);	       \
+	if (!file->private_data)					       \
+		return -ENOMEM;						       \
+	if (stsi(file->private_data, fc, s1, s2)) {			       \
+		free_page((unsigned long)file->private_data);		       \
+		file->private_data = NULL;				       \
+		return -EACCES;						       \
+	}								       \
+	return nonseekable_open(inode, file);				       \
+}									       \
+									       \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = {       \
+	.open		= stsi_open_##fc##_##s1##_##s2,			       \
+	.release	= stsi_release,					       \
+	.read		= stsi_read,					       \
+	.llseek		= no_llseek,					       \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+	free_page((unsigned long)file->private_data);
+	return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+	return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+	const struct file_operations *fops;
+	char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+	{.fops = &stsi_1_1_1_fs_ops,  .name =  "1_1_1"},
+	{.fops = &stsi_1_2_1_fs_ops,  .name =  "1_2_1"},
+	{.fops = &stsi_1_2_2_fs_ops,  .name =  "1_2_2"},
+	{.fops = &stsi_2_2_1_fs_ops,  .name =  "2_2_1"},
+	{.fops = &stsi_2_2_2_fs_ops,  .name =  "2_2_2"},
+	{.fops = &stsi_3_2_2_fs_ops,  .name =  "3_2_2"},
+	{.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+	{.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+	{.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+	{.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+	{.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+	struct dentry *stsi_root;
+	struct stsi_file *sf;
+	int lvl, i;
+
+	stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+	if (IS_ERR_OR_NULL(stsi_root))
+		return 0;
+	lvl = stsi(NULL, 0, 0, 0);
+	if (lvl > 0)
+		stsi_0_0_0 = lvl;
+	debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+	for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+		sf = &stsi_file[i];
+		debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+	}
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+		char link_to[10];
+
+		sprintf(link_to, "15_1_%d", topology_mnest_limit());
+		debugfs_create_symlink("topology", stsi_root, link_to);
+	}
+	return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 17660e800e74..bb47c92476f0 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	return mask;
 }
 
+#define TOPOLOGY_CORE_BITS	64
+
 static void add_cpus_to_mask(struct topology_core *tl_core,
 			     struct mask_info *drawer,
 			     struct mask_info *book,
@@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
 	struct cpu_topology_s390 *topo;
 	unsigned int core;
 
-	for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
+	for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
 		unsigned int rcore;
 		int lcpu, i;
 
@@ -244,7 +246,7 @@ static void update_cpu_masks(void)
 
 void store_topology(struct sysinfo_15_1_x *info)
 {
-	stsi(info, 15, 1, min(topology_max_mnest, 4));
+	stsi(info, 15, 1, topology_mnest_limit());
 }
 
 static int __arch_update_cpu_topology(void)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f14199734..169558dc7daf 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	/* Extended save area */
-	rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
-			    sizeof(unsigned long));
+	rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+			   sizeof(unsigned long));
 	/* Only bits 0-53 are used for address formation */
 	ext_sa_addr &= ~0x3ffUL;
 	if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05bb6a7..d5c5c911821a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -273,7 +273,7 @@ static void kvm_s390_cpu_feat_init(void)
 			      kvm_s390_available_subfunc.pcc);
 	}
 	if (test_facility(57)) /* MSA5 */
-		__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 			      kvm_s390_available_subfunc.ppno);
 
 	if (MACHINE_HAS_ESOP)
@@ -1512,9 +1512,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
 	} else {
 		if (sclp.hamax == U64_MAX)
-			kvm->arch.mem_limit = TASK_MAX_SIZE;
+			kvm->arch.mem_limit = TASK_SIZE_MAX;
 		else
-			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
+			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
 						    sclp.hamax + 1);
 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
 		if (!kvm->arch.gmap)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ba427eb6f14c..ffb15bd4c593 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -17,7 +17,7 @@ int spin_retry = -1;
 static int __init spin_retry_init(void)
 {
 	if (spin_retry < 0)
-		spin_retry = MACHINE_HAS_CAD ? 10 : 1000;
+		spin_retry = 1000;
 	return 0;
 }
 early_initcall(spin_retry_init);
@@ -32,23 +32,17 @@ static int __init spin_retry_setup(char *str)
 }
 __setup("spin_retry=", spin_retry_setup);
 
-static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old)
-{
-	asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock));
-}
-
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
-	unsigned int cpu = SPINLOCK_LOCKVAL;
-	unsigned int owner;
-	int count, first_diag;
+	int cpu = SPINLOCK_LOCKVAL;
+	int owner, count, first_diag;
 
 	first_diag = 1;
 	while (1) {
 		owner = ACCESS_ONCE(lp->lock);
 		/* Try to get the lock if it is free. */
 		if (!owner) {
-			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
 				return;
 			continue;
 		}
@@ -61,8 +55,6 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
 		/* Loop for a while on the lock value. */
 		count = spin_retry;
 		do {
-			if (MACHINE_HAS_CAD)
-				_raw_compare_and_delay(&lp->lock, owner);
 			owner = ACCESS_ONCE(lp->lock);
 		} while (owner && count-- > 0);
 		if (!owner)
@@ -82,9 +74,8 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 
 void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
-	unsigned int cpu = SPINLOCK_LOCKVAL;
-	unsigned int owner;
-	int count, first_diag;
+	int cpu = SPINLOCK_LOCKVAL;
+	int owner, count, first_diag;
 
 	local_irq_restore(flags);
 	first_diag = 1;
@@ -93,7 +84,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 		/* Try to get the lock if it is free. */
 		if (!owner) {
 			local_irq_disable();
-			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
 				return;
 			local_irq_restore(flags);
 			continue;
@@ -107,8 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 		/* Loop for a while on the lock value. */
 		count = spin_retry;
 		do {
-			if (MACHINE_HAS_CAD)
-				_raw_compare_and_delay(&lp->lock, owner);
 			owner = ACCESS_ONCE(lp->lock);
 		} while (owner && count-- > 0);
 		if (!owner)
@@ -128,18 +117,16 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags);
 
 int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
-	unsigned int cpu = SPINLOCK_LOCKVAL;
-	unsigned int owner;
-	int count;
+	int cpu = SPINLOCK_LOCKVAL;
+	int owner, count;
 
 	for (count = spin_retry; count > 0; count--) {
 		owner = READ_ONCE(lp->lock);
 		/* Try to get the lock if it is free. */
 		if (!owner) {
-			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
 				return 1;
-		} else if (MACHINE_HAS_CAD)
-			_raw_compare_and_delay(&lp->lock, owner);
+		}
 	}
 	return 0;
 }
@@ -147,8 +134,8 @@ EXPORT_SYMBOL(arch_spin_trylock_retry);
 
 void _raw_read_lock_wait(arch_rwlock_t *rw)
 {
-	unsigned int owner, old;
 	int count = spin_retry;
+	int owner, old;
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
 	__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
@@ -162,12 +149,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
 		}
 		old = ACCESS_ONCE(rw->lock);
 		owner = ACCESS_ONCE(rw->owner);
-		if ((int) old < 0) {
-			if (MACHINE_HAS_CAD)
-				_raw_compare_and_delay(&rw->lock, old);
+		if (old < 0)
 			continue;
-		}
-		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+		if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
 			return;
 	}
 }
@@ -175,17 +159,14 @@ EXPORT_SYMBOL(_raw_read_lock_wait);
 
 int _raw_read_trylock_retry(arch_rwlock_t *rw)
 {
-	unsigned int old;
 	int count = spin_retry;
+	int old;
 
 	while (count-- > 0) {
 		old = ACCESS_ONCE(rw->lock);
-		if ((int) old < 0) {
-			if (MACHINE_HAS_CAD)
-				_raw_compare_and_delay(&rw->lock, old);
+		if (old < 0)
 			continue;
-		}
-		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+		if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
 			return 1;
 	}
 	return 0;
@@ -194,10 +175,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry);
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
 
-void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
+void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
 {
-	unsigned int owner, old;
 	int count = spin_retry;
+	int owner, old;
 
 	owner = 0;
 	while (1) {
@@ -209,14 +190,12 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
 		old = ACCESS_ONCE(rw->lock);
 		owner = ACCESS_ONCE(rw->owner);
 		smp_mb();
-		if ((int) old >= 0) {
+		if (old >= 0) {
 			prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
 			old = prev;
 		}
-		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+		if ((old & 0x7fffffff) == 0 && prev >= 0)
 			break;
-		if (MACHINE_HAS_CAD)
-			_raw_compare_and_delay(&rw->lock, old);
 	}
 }
 EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -225,8 +204,8 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
 
 void _raw_write_lock_wait(arch_rwlock_t *rw)
 {
-	unsigned int owner, old, prev;
 	int count = spin_retry;
+	int owner, old, prev;
 
 	prev = 0x80000000;
 	owner = 0;
@@ -238,15 +217,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
 		}
 		old = ACCESS_ONCE(rw->lock);
 		owner = ACCESS_ONCE(rw->owner);
-		if ((int) old >= 0 &&
-		    _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
+		if (old >= 0 &&
+		    __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
 			prev = old;
 		else
 			smp_mb();
-		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+		if ((old & 0x7fffffff) == 0 && prev >= 0)
 			break;
-		if (MACHINE_HAS_CAD)
-			_raw_compare_and_delay(&rw->lock, old);
 	}
 }
 EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -255,24 +232,21 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
 
 int _raw_write_trylock_retry(arch_rwlock_t *rw)
 {
-	unsigned int old;
 	int count = spin_retry;
+	int old;
 
 	while (count-- > 0) {
 		old = ACCESS_ONCE(rw->lock);
-		if (old) {
-			if (MACHINE_HAS_CAD)
-				_raw_compare_and_delay(&rw->lock, old);
+		if (old)
 			continue;
-		}
-		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
+		if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
 			return 1;
 	}
 	return 0;
 }
 EXPORT_SYMBOL(_raw_write_trylock_retry);
 
-void arch_lock_relax(unsigned int cpu)
+void arch_lock_relax(int cpu)
 {
 	if (!cpu)
 		return;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a07b1ec1391d..7f6db1e6c048 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 	if (len == 0 || from + len < from || to + len < to ||
-	    from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+	    from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
 		return -EINVAL;
 
 	flush = 0;
@@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
  * Called with sg->parent->shadow_lock.
  */
 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
-			       unsigned long offset, pte_t *pte)
+			       unsigned long gaddr, pte_t *pte)
 {
 	struct gmap_rmap *rmap, *rnext, *head;
-	unsigned long gaddr, start, end, bits, raddr;
-	unsigned long *table;
+	unsigned long start, end, bits, raddr;
 
 	BUG_ON(!gmap_is_shadow(sg));
-	spin_lock(&sg->parent->guest_table_lock);
-	table = radix_tree_lookup(&sg->parent->host_to_guest,
-				  vmaddr >> PMD_SHIFT);
-	gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
-	spin_unlock(&sg->parent->guest_table_lock);
-	if (!table)
-		return;
 
 	spin_lock(&sg->guest_table_lock);
 	if (sg->removed) {
@@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 		 pte_t *pte, unsigned long bits)
 {
-	unsigned long offset, gaddr;
+	unsigned long offset, gaddr = 0;
 	unsigned long *table;
 	struct gmap *gmap, *sg, *next;
 
@@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 	offset = offset * (4096 / sizeof(pte_t));
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
-		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
-			spin_lock(&gmap->shadow_lock);
-			list_for_each_entry_safe(sg, next,
-						 &gmap->children, list)
-				gmap_shadow_notify(sg, vmaddr, offset, pte);
-			spin_unlock(&gmap->shadow_lock);
-		}
-		if (!(bits & PGSTE_IN_BIT))
-			continue;
 		spin_lock(&gmap->guest_table_lock);
 		table = radix_tree_lookup(&gmap->host_to_guest,
 					  vmaddr >> PMD_SHIFT);
 		if (table)
 			gaddr = __gmap_segment_gaddr(table) + offset;
 		spin_unlock(&gmap->guest_table_lock);
-		if (table)
+		if (!table)
+			continue;
+
+		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+			spin_lock(&gmap->shadow_lock);
+			list_for_each_entry_safe(sg, next,
+						 &gmap->children, list)
+				gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+			spin_unlock(&gmap->shadow_lock);
+		}
+		if (bits & PGSTE_IN_BIT)
 			gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
 	}
 	rcu_read_unlock();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..b7b779c40a5b 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	addr = start;
 	len = (unsigned long) nr_pages << PAGE_SHIFT;
 	end = start + len;
-	if ((end <= start) || (end > TASK_SIZE))
+	if ((end <= start) || (end > mm->context.asce_limit))
 		return 0;
 	/*
 	 * local_irq_save() doesn't prevent pagetable teardown, but does
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 50618614881f..b017daed6887 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
+	int rc;
 
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED)
-		return addr;
+		goto check_asce_limit;
 
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
 		    (!vma || addr + len <= vma->vm_start))
-			return addr;
+			goto check_asce_limit;
 	}
 
 	info.flags = 0;
@@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	else
 		info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
-	return vm_unmapped_area(&info);
+	addr = vm_unmapped_area(&info);
+	if (addr & ~PAGE_MASK)
+		return addr;
+
+check_asce_limit:
+	if (addr + len > current->mm->context.asce_limit) {
+		rc = crst_table_upgrade(mm);
+		if (rc)
+			return (unsigned long) rc;
+	}
+
+	return addr;
 }
 
 unsigned long
@@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
+	int rc;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED)
-		return addr;
+		goto check_asce_limit;
 
 	/* requesting a specific address */
 	if (addr) {
@@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
 				(!vma || addr + len <= vma->vm_start))
-			return addr;
+			goto check_asce_limit;
 	}
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		info.low_limit = TASK_UNMAPPED_BASE;
 		info.high_limit = TASK_SIZE;
 		addr = vm_unmapped_area(&info);
+		if (addr & ~PAGE_MASK)
+			return addr;
 	}
 
-	return addr;
-}
-
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
-{
-	if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
-		return 0;
-	if (!(flags & MAP_FIXED))
-		addr = 0;
-	if ((addr + len) >= TASK_SIZE)
-		return crst_table_upgrade(current->mm);
-	return 0;
-}
-
-static unsigned long
-s390_get_unmapped_area(struct file *filp, unsigned long addr,
-		unsigned long len, unsigned long pgoff, unsigned long flags)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long area;
-	int rc;
-
-	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
-	if (!(area & ~PAGE_MASK))
-		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
-		/* Upgrade the page table to 4 levels and retry. */
+check_asce_limit:
+	if (addr + len > current->mm->context.asce_limit) {
 		rc = crst_table_upgrade(mm);
 		if (rc)
 			return (unsigned long) rc;
-		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
 	}
-	return area;
-}
-
-static unsigned long
-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
-			  const unsigned long len, const unsigned long pgoff,
-			  const unsigned long flags)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long area;
-	int rc;
 
-	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
-	if (!(area & ~PAGE_MASK))
-		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
-		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm);
-		if (rc)
-			return (unsigned long) rc;
-		area = arch_get_unmapped_area_topdown(filp, addr, len,
-						      pgoff, flags);
-	}
-	return area;
+	return addr;
 }
+
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
@@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 */
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mmap_base_legacy(random_factor);
-		mm->get_unmapped_area = s390_get_unmapped_area;
+		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
 		mm->mmap_base = mmap_base(random_factor);
-		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 3330ea124eec..69a7b01ae746 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,8 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 
-#define ESSA_SET_STABLE		1
-#define ESSA_SET_UNUSED		2
+#include <asm/page-states.h>
 
 static int cmma_flag = 1;
 
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..fc321c5ec30e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
 			new = pte_wrprotect(new);
 		else if (flags & SET_MEMORY_RW)
 			new = pte_mkwrite(pte_mkdirty(new));
-		if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+		if (flags & SET_MEMORY_NX)
 			pte_val(new) |= _PAGE_NOEXEC;
 		else if (flags & SET_MEMORY_X)
 			pte_val(new) &= ~_PAGE_NOEXEC;
@@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
 		new = pmd_wrprotect(new);
 	else if (flags & SET_MEMORY_RW)
 		new = pmd_mkwrite(pmd_mkdirty(new));
-	if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+	if (flags & SET_MEMORY_NX)
 		pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
 	else if (flags & SET_MEMORY_X)
 		pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
@@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
 		new = pud_wrprotect(new);
 	else if (flags & SET_MEMORY_RW)
 		new = pud_mkwrite(pud_mkdirty(new));
-	if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+	if (flags & SET_MEMORY_NX)
 		pud_val(new) |= _REGION_ENTRY_NOEXEC;
 	else if (flags & SET_MEMORY_X)
 		pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
@@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
 
 int __set_memory(unsigned long addr, int numpages, unsigned long flags)
 {
+	if (!MACHINE_HAS_NX)
+		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+	if (!flags)
+		return 0;
 	addr &= PAGE_MASK;
 	return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
 }
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..f502cbe657af 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm)
 	mm->context.asce_limit = 1UL << 53;
 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 			   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
-	mm->task_size = mm->context.asce_limit;
 	spin_unlock_bh(&mm->page_table_lock);
 
 	on_each_cpu(__crst_table_upgrade, mm, 0);
@@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm)
 	mm->context.asce_limit = 1UL << 31;
 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-	mm->task_size = mm->context.asce_limit;
 	crst_table_free(mm, (unsigned long *) pgd);
 
 	if (current->active_mm == mm)
@@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
 	struct page *page;
 	unsigned long *table;
 
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	page = alloc_page(GFP_KERNEL);
 	if (page) {
 		table = (unsigned long *) page_to_phys(page);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 463e5ef02304..947b66a5cdba 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/page-states.h>
 
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
@@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	return 0;
 }
 EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ *	   or < 0 in case of error. -EINVAL is returned for invalid values
+ *	   of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+			unsigned long *oldpte, unsigned long *oldpgste)
+{
+	unsigned long pgstev;
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	int res = 0;
+
+	WARN_ON_ONCE(orc > ESSA_MAX);
+	if (unlikely(orc > ESSA_MAX))
+		return -EINVAL;
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	pgste = pgste_get_lock(ptep);
+	pgstev = pgste_val(pgste);
+	if (oldpte)
+		*oldpte = pte_val(*ptep);
+	if (oldpgste)
+		*oldpgste = pgstev;
+
+	switch (orc) {
+	case ESSA_GET_STATE:
+		break;
+	case ESSA_SET_STABLE:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		break;
+	case ESSA_SET_UNUSED:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+		if (pte_val(*ptep) & _PAGE_INVALID)
+			res = 1;
+		break;
+	case ESSA_SET_VOLATILE:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+		if (pte_val(*ptep) & _PAGE_INVALID)
+			res = 1;
+		break;
+	case ESSA_SET_POT_VOLATILE:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+			pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+			break;
+		}
+		if (pgstev & _PGSTE_GPS_ZERO) {
+			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+			break;
+		}
+		if (!(pgstev & PGSTE_GC_BIT)) {
+			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+			res = 1;
+			break;
+		}
+		break;
+	case ESSA_SET_STABLE_RESIDENT:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		/*
+		 * Since the resident state can go away any time after this
+		 * call, we will not make this page resident. We can revisit
+		 * this decision if a guest will ever start using this.
+		 */
+		break;
+	case ESSA_SET_STABLE_IF_RESIDENT:
+		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+			pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+			pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		}
+		break;
+	default:
+		/* we should never get here! */
+		break;
+	}
+	/* If we are discarding a page, set it to logical zero */
+	if (res)
+		pgstev |= _PGSTE_GPS_ZERO;
+
+	pgste_val(pgste) = pgstev;
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ *	   will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+			unsigned long bits, unsigned long value)
+{
+	spinlock_t *ptl;
+	pgste_t new;
+	pte_t *ptep;
+
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	new = pgste_get_lock(ptep);
+
+	pgste_val(new) &= ~bits;
+	pgste_val(new) |= value & bits;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	*pgstep = pgste_val(pgste_get(ptep));
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(get_pgste);
 #endif
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 364b9d824be3..8051df109db3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -60,16 +60,8 @@ static DEFINE_SPINLOCK(zpci_domain_lock);
 static struct airq_iv *zpci_aisb_iv;
 static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
 
-/* Adapter interrupt definitions */
-static void zpci_irq_handler(struct airq_struct *airq);
-
-static struct airq_struct zpci_airq = {
-	.handler = zpci_irq_handler,
-	.isc = PCI_ISC,
-};
-
 #define ZPCI_IOMAP_ENTRIES						\
-	min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),	\
+	min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2),	\
 	    ZPCI_IOMAP_MAX_ENTRIES)
 
 static DEFINE_SPINLOCK(zpci_iomap_lock);
@@ -214,8 +206,6 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
 	return rc;
 }
 
-#define ZPCI_PCIAS_CFGSPC	15
-
 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
@@ -507,6 +497,11 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
 	}
 }
 
+static struct airq_struct zpci_airq = {
+	.handler = zpci_irq_handler,
+	.isc = PCI_ISC,
+};
+
 static int __init zpci_irq_init(void)
 {
 	int rc;
@@ -871,11 +866,6 @@ int zpci_report_error(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(zpci_report_error);
 
-static inline int barsize(u8 size)
-{
-	return (size) ? (1 << size) >> 10 : 0;
-}
-
 static int zpci_mem_init(void)
 {
 	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 0cafe08919c9..b9918fb9587d 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -423,6 +423,20 @@ config HW_RANDOM_CAVIUM
 
          If unsure, say Y.
 
+config HW_RANDOM_S390
+	tristate "S390 True Random Number Generator support"
+	depends on S390
+	default HW_RANDOM
+	---help---
+	  This driver provides kernel-side support for the True
+	  Random Number Generator available as CPACF extension
+	  on modern s390 hardware platforms.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called s390-trng.
+
+	  If unsure, say Y.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 5f52b1e4e7be..dd1765246255 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
 obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
 obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
+obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
new file mode 100644
index 000000000000..aca48e893fca
--- /dev/null
+++ b/drivers/char/hw_random/s390-trng.c
@@ -0,0 +1,268 @@
+/*
+ * s390 TRNG device driver
+ *
+ * Driver for the TRNG (true random number generation) command
+ * available via CPACF extension MSA 7 on the s390 arch.
+
+ * Copyright IBM Corp. 2017
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#define KMSG_COMPONENT "trng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/miscdevice.h>
+#include <linux/debugfs.h>
+#include <linux/atomic.h>
+#include <linux/random.h>
+#include <linux/sched/signal.h>
+#include <asm/debug.h>
+#include <asm/cpacf.h>
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 CPACF TRNG device driver");
+
+
+/* trng related debug feature things */
+
+static debug_info_t *debug_info;
+
+#define DEBUG_DBG(...)	debug_sprintf_event(debug_info, 6, ##__VA_ARGS__)
+#define DEBUG_INFO(...) debug_sprintf_event(debug_info, 5, ##__VA_ARGS__)
+#define DEBUG_WARN(...) debug_sprintf_event(debug_info, 4, ##__VA_ARGS__)
+#define DEBUG_ERR(...)	debug_sprintf_event(debug_info, 3, ##__VA_ARGS__)
+
+
+/* trng helpers */
+
+static atomic64_t trng_dev_counter = ATOMIC64_INIT(0);
+static atomic64_t trng_hwrng_counter = ATOMIC64_INIT(0);
+
+
+/* file io functions */
+
+static int trng_open(struct inode *inode, struct file *file)
+{
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t trng_read(struct file *file, char __user *ubuf,
+			 size_t nbytes, loff_t *ppos)
+{
+	u8 buf[32];
+	u8 *p = buf;
+	unsigned int n;
+	ssize_t ret = 0;
+
+	/*
+	 * use buf for requests <= sizeof(buf),
+	 * otherwise allocate one page and fetch
+	 * pagewise.
+	 */
+
+	if (nbytes > sizeof(buf)) {
+		p = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+	}
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			schedule();
+		}
+		n = nbytes > PAGE_SIZE ? PAGE_SIZE : nbytes;
+		cpacf_trng(NULL, 0, p, n);
+		atomic64_add(n, &trng_dev_counter);
+		if (copy_to_user(ubuf, p, n)) {
+			ret = -EFAULT;
+			break;
+		}
+		nbytes -= n;
+		ubuf += n;
+		ret += n;
+	}
+
+	if (p != buf)
+		free_page((unsigned long) p);
+
+	DEBUG_DBG("trng_read()=%zd\n", ret);
+	return ret;
+}
+
+
+/* sysfs */
+
+static ssize_t trng_counter_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	u64 dev_counter = atomic64_read(&trng_dev_counter);
+	u64 hwrng_counter = atomic64_read(&trng_hwrng_counter);
+#if IS_ENABLED(CONFIG_ARCH_RANDOM)
+	u64 arch_counter = atomic64_read(&s390_arch_random_counter);
+
+	return snprintf(buf, PAGE_SIZE,
+			"trng:  %llu\n"
+			"hwrng: %llu\n"
+			"arch:  %llu\n"
+			"total: %llu\n",
+			dev_counter, hwrng_counter, arch_counter,
+			dev_counter + hwrng_counter + arch_counter);
+#else
+	return snprintf(buf, PAGE_SIZE,
+			"trng:  %llu\n"
+			"hwrng: %llu\n"
+			"total: %llu\n",
+			dev_counter, hwrng_counter,
+			dev_counter + hwrng_counter);
+#endif
+}
+static DEVICE_ATTR(byte_counter, 0444, trng_counter_show, NULL);
+
+static struct attribute *trng_dev_attrs[] = {
+	&dev_attr_byte_counter.attr,
+	NULL
+};
+
+static const struct attribute_group trng_dev_attr_group = {
+	.attrs = trng_dev_attrs
+};
+
+static const struct attribute_group *trng_dev_attr_groups[] = {
+	&trng_dev_attr_group,
+	NULL
+};
+
+static const struct file_operations trng_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &trng_open,
+	.release	= NULL,
+	.read		= &trng_read,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice trng_dev = {
+	.name	= "trng",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.mode	= 0444,
+	.fops	= &trng_fops,
+	.groups = trng_dev_attr_groups,
+};
+
+
+/* hwrng_register */
+
+static inline void _trng_hwrng_read(u8 *buf, size_t len)
+{
+	cpacf_trng(NULL, 0, buf, len);
+	atomic64_add(len, &trng_hwrng_counter);
+}
+
+static int trng_hwrng_data_read(struct hwrng *rng, u32 *data)
+{
+	size_t len = sizeof(*data);
+
+	_trng_hwrng_read((u8 *) data, len);
+
+	DEBUG_DBG("trng_hwrng_data_read()=%zu\n", len);
+
+	return len;
+}
+
+static int trng_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	size_t len = max <= PAGE_SIZE ? max : PAGE_SIZE;
+
+	_trng_hwrng_read((u8 *) data, len);
+
+	DEBUG_DBG("trng_hwrng_read()=%zu\n", len);
+
+	return len;
+}
+
+/*
+ * hwrng register struct
+ * The trng is suppost to have 100% entropy, and thus
+ * we register with a very high quality value.
+ */
+static struct hwrng trng_hwrng_dev = {
+	.name		= "s390-trng",
+	.data_read	= trng_hwrng_data_read,
+	.read		= trng_hwrng_read,
+	.quality	= 999,
+};
+
+
+/* init and exit */
+
+static void __init trng_debug_init(void)
+{
+	debug_info = debug_register("trng", 1, 1, 4 * sizeof(long));
+	debug_register_view(debug_info, &debug_sprintf_view);
+	debug_set_level(debug_info, 3);
+}
+
+static void trng_debug_exit(void)
+{
+	debug_unregister(debug_info);
+}
+
+static int __init trng_init(void)
+{
+	int ret;
+
+	trng_debug_init();
+
+	/* check if subfunction CPACF_PRNO_TRNG is available */
+	if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) {
+		DEBUG_INFO("trng_init CPACF_PRNO_TRNG not available\n");
+		ret = -ENODEV;
+		goto out_dbg;
+	}
+
+	ret = misc_register(&trng_dev);
+	if (ret) {
+		DEBUG_WARN("trng_init misc_register() failed rc=%d\n", ret);
+		goto out_dbg;
+	}
+
+	ret = hwrng_register(&trng_hwrng_dev);
+	if (ret) {
+		DEBUG_WARN("trng_init hwrng_register() failed rc=%d\n", ret);
+		goto out_misc;
+	}
+
+	DEBUG_DBG("trng_init successful\n");
+
+	return 0;
+
+out_misc:
+	misc_deregister(&trng_dev);
+out_dbg:
+	trng_debug_exit();
+	return ret;
+}
+
+static void __exit trng_exit(void)
+{
+	hwrng_unregister(&trng_hwrng_dev);
+	misc_deregister(&trng_dev);
+	trng_debug_exit();
+}
+
+module_cpu_feature_match(MSA, trng_init);
+module_exit(trng_exit);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 37e204f3d9be..6ee3a25ae731 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -327,6 +327,14 @@ config S390_IOMMU
 	help
 	  Support for the IOMMU API for s390 PCI devices.
 
+config S390_CCW_IOMMU
+	bool "S390 CCW IOMMU Support"
+	depends on S390 && CCW
+	select IOMMU_API
+	help
+	  Enables bits of IOMMU API required by VFIO. The iommu_ops
+	  is not implemented as it is not necessary for VFIO.
+
 config MTK_IOMMU
 	bool "MTK IOMMU Support"
 	depends on ARM || ARM64
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 774da20ceb58..107cd3361e29 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1052,8 +1052,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
 	} else {
 		/* fatal error -  set status to FAILED
 		   internal error 09 - Command Reject */
-		dev_err(&device->cdev->dev, "An error occurred in the DASD "
-			"device driver, reason=%s\n", "09");
+		if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags))
+			dev_err(&device->cdev->dev,
+				"An error occurred in the DASD device driver, reason=09\n");
 
 		erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
 	}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 0b38217f8147..122456e4db89 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4927,10 +4927,14 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
 		dasd_eckd_dump_sense_tcw(device, req, irb);
 	} else {
 		/*
-		 * In some cases the 'No Record Found' error might be expected
-		 * and log messages shouldn't be written then. Check if the
-		 * according suppress bit is set.
+		 * In some cases the 'Command Reject' or 'No Record Found'
+		 * error might be expected and log messages shouldn't be
+		 * written then. Check if the according suppress bit is set.
 		 */
+		if (sense && sense[0] & SNS0_CMD_REJECT &&
+		    test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
+			return;
+
 		if (sense && sense[1] & SNS1_NO_REC_FOUND &&
 		    test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
 			return;
@@ -5172,6 +5176,10 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
 	if (!device->block && private->lcu->pav == HYPER_PAV)
 		return -EOPNOTSUPP;
 
+	/* may not be supported by the storage server */
+	if (!(private->features.feature[14] & 0x80))
+		return -EOPNOTSUPP;
+
 	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */	+ 1 /* RSSD */,
 				   sizeof(struct dasd_psf_prssd_data) + 1,
 				   device);
@@ -5219,6 +5227,8 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
 
 	cqr->buildclk = get_tod_clock();
 	cqr->status = DASD_CQR_FILLED;
+	/* the command might not be supported, suppress error message */
+	__set_bit(DASD_CQR_SUPPRESS_CR, &cqr->flags);
 	rc = dasd_sleep_on_interruptible(cqr);
 	if (rc == 0) {
 		*data = *host_access;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 518dba2732d5..dca7cb1e6f65 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -239,11 +239,11 @@ struct dasd_ccw_req {
 					 */
 /*
  * The following flags are used to suppress output of certain errors.
- * These flags should only be used for format checks!
  */
 #define DASD_CQR_SUPPRESS_NRF	4	/* Suppress 'No Record Found' error */
 #define DASD_CQR_SUPPRESS_FP	5	/* Suppress 'File Protected' error*/
 #define DASD_CQR_SUPPRESS_IL	6	/* Suppress 'Incorrect Length' error */
+#define DASD_CQR_SUPPRESS_CR	7	/* Suppress 'Command Reject' error */
 
 /* Signature for error recovery functions. */
 typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index 3ab9aedeb84a..bdf47526038a 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
 
 qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
 obj-$(CONFIG_QDIO) += qdio.o
+
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 1b350665c823..89216174fcbb 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch,	/* subchannel structure */
 		return ccode;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_start_key);
 
 int
 cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
 {
 	return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY);
 }
+EXPORT_SYMBOL_GPL(cio_start);
 
 /*
  * resume suspended I/O operation
@@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch)
 		return -ENODEV;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_resume);
 
 /*
  * halt I/O operation
@@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch)
 		return -ENODEV;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_halt);
 
 /*
  * Clear I/O operation
@@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch)
 		return -ENODEV;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_clear);
 
 /*
  * Function: cio_cancel
@@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch)
 		return -ENODEV;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_cancel);
 
+/**
+ * cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt
+ * and clear ordinally if subchannel is valid.
+ * @sch: subchannel on which to perform the cancel_halt_clear operation
+ * @iretry: the number of the times remained to retry the next operation
+ *
+ * This should be called repeatedly since halt/clear are asynchronous
+ * operations. We do one try with cio_cancel, three tries with cio_halt,
+ * 255 tries with cio_clear. The caller should initialize @iretry with
+ * the value 255 for its first call to this, and keep using the same
+ * @iretry in the subsequent calls until it gets a non -EBUSY return.
+ *
+ * Returns 0 if device now idle, -ENODEV for device not operational,
+ * -EBUSY if an interrupt is expected (either from halt/clear or from a
+ * status pending), and -EIO if out of retries.
+ */
+int cio_cancel_halt_clear(struct subchannel *sch, int *iretry)
+{
+	int ret;
+
+	if (cio_update_schib(sch))
+		return -ENODEV;
+	if (!sch->schib.pmcw.ena)
+		/* Not operational -> done. */
+		return 0;
+	/* Stage 1: cancel io. */
+	if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
+	    !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+		if (!scsw_is_tm(&sch->schib.scsw)) {
+			ret = cio_cancel(sch);
+			if (ret != -EINVAL)
+				return ret;
+		}
+		/*
+		 * Cancel io unsuccessful or not applicable (transport mode).
+		 * Continue with asynchronous instructions.
+		 */
+		*iretry = 3;	/* 3 halt retries. */
+	}
+	/* Stage 2: halt io. */
+	if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+		if (*iretry) {
+			*iretry -= 1;
+			ret = cio_halt(sch);
+			if (ret != -EBUSY)
+				return (ret == 0) ? -EBUSY : ret;
+		}
+		/* Halt io unsuccessful. */
+		*iretry = 255;	/* 255 clear retries. */
+	}
+	/* Stage 3: clear io. */
+	if (*iretry) {
+		*iretry -= 1;
+		ret = cio_clear(sch);
+		return (ret == 0) ? -EBUSY : ret;
+	}
+	/* Function was unsuccessful */
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(cio_cancel_halt_clear);
 
 static void cio_apply_config(struct subchannel *sch, struct schib *schib)
 {
@@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch)
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cio_commit_config);
 
 /**
  * cio_update_schib - Perform stsch and update schib if subchannel is valid.
@@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key)
 		return cio_start_handle_notoper(sch, lpm);
 	}
 }
+EXPORT_SYMBOL_GPL(cio_tm_start_key);
 
 /**
  * cio_tm_intrg - perform interrogate function
@@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch)
 		return -ENODEV;
 	}
 }
+EXPORT_SYMBOL_GPL(cio_tm_intrg);
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index f0e57aefb5f2..939596d81b73 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32);
 extern int cio_disable_subchannel (struct subchannel *);
 extern int cio_cancel (struct subchannel *);
 extern int cio_clear (struct subchannel *);
+extern int cio_cancel_halt_clear(struct subchannel *, int *);
 extern int cio_resume (struct subchannel *);
 extern int cio_halt (struct subchannel *);
 extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9afb5ce13007..12016e32e519 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
 	add_timer(&cdev->private->timer);
 }
 
-/*
- * Cancel running i/o. This is called repeatedly since halt/clear are
- * asynchronous operations. We do one try with cio_cancel, two tries
- * with cio_halt, 255 tries with cio_clear. If everythings fails panic.
- * Returns 0 if device now idle, -ENODEV for device not operational and
- * -EBUSY if an interrupt is expected (either from halt/clear or from a
- * status pending).
- */
 int
 ccw_device_cancel_halt_clear(struct ccw_device *cdev)
 {
@@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
 	int ret;
 
 	sch = to_subchannel(cdev->dev.parent);
-	if (cio_update_schib(sch))
-		return -ENODEV; 
-	if (!sch->schib.pmcw.ena)
-		/* Not operational -> done. */
-		return 0;
-	/* Stage 1: cancel io. */
-	if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
-	    !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
-		if (!scsw_is_tm(&sch->schib.scsw)) {
-			ret = cio_cancel(sch);
-			if (ret != -EINVAL)
-				return ret;
-		}
-		/* cancel io unsuccessful or not applicable (transport mode).
-		 * Continue with asynchronous instructions. */
-		cdev->private->iretry = 3;	/* 3 halt retries. */
-	}
-	if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
-		/* Stage 2: halt io. */
-		if (cdev->private->iretry) {
-			cdev->private->iretry--;
-			ret = cio_halt(sch);
-			if (ret != -EBUSY)
-				return (ret == 0) ? -EBUSY : ret;
-		}
-		/* halt io unsuccessful. */
-		cdev->private->iretry = 255;	/* 255 clear retries. */
-	}
-	/* Stage 3: clear io. */
-	if (cdev->private->iretry) {
-		cdev->private->iretry--;
-		ret = cio_clear (sch);
-		return (ret == 0) ? -EBUSY : ret;
-	}
-	/* Function was unsuccessful */
-	CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
-		      cdev->private->dev_id.ssid, cdev->private->dev_id.devno);
-	return -EIO;
+	ret = cio_cancel_halt_clear(sch, &cdev->private->iretry);
+
+	if (ret == -EIO)
+		CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno);
+
+	return ret;
 }
 
 void ccw_device_update_sense_data(struct ccw_device *cdev)
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
new file mode 100644
index 000000000000..ba6ac83a6c25
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -0,0 +1,842 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/vfio.h>
+#include <asm/idals.h>
+
+#include "vfio_ccw_cp.h"
+
+/*
+ * Max length for ccw chain.
+ * XXX: Limit to 256, need to check more?
+ */
+#define CCWCHAIN_LEN_MAX	256
+
+struct pfn_array {
+	unsigned long		pa_iova;
+	unsigned long		*pa_iova_pfn;
+	unsigned long		*pa_pfn;
+	int			pa_nr;
+};
+
+struct pfn_array_table {
+	struct pfn_array	*pat_pa;
+	int			pat_nr;
+};
+
+struct ccwchain {
+	struct list_head	next;
+	struct ccw1		*ch_ccw;
+	/* Guest physical address of the current chain. */
+	u64			ch_iova;
+	/* Count of the valid ccws in chain. */
+	int			ch_len;
+	/* Pinned PAGEs for the original data. */
+	struct pfn_array_table	*ch_pat;
+};
+
+/*
+ * pfn_array_pin() - pin user pages in memory
+ * @pa: pfn_array on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ *
+ * Attempt to pin user pages in memory.
+ *
+ * Usage of pfn_array:
+ * @pa->pa_iova     starting guest physical I/O address. Assigned by caller.
+ * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
+ *                  by caller.
+ * @pa->pa_pfn      array that receives PFNs of the pages pinned. Allocated by
+ *                  caller.
+ * @pa->pa_nr       number of pages from @pa->pa_iova to pin. Assigned by
+ *                  caller.
+ *                  number of pages pinned. Assigned by callee.
+ *
+ * Returns:
+ *   Number of pages pinned on success.
+ *   If @pa->pa_nr is 0 or negative, returns 0.
+ *   If no pages were pinned, returns -errno.
+ */
+static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
+{
+	int i, ret;
+
+	if (pa->pa_nr <= 0) {
+		pa->pa_nr = 0;
+		return 0;
+	}
+
+	pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
+	for (i = 1; i < pa->pa_nr; i++)
+		pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
+
+	ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
+			     IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
+
+	if (ret > 0 && ret != pa->pa_nr) {
+		vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
+		pa->pa_nr = 0;
+		return 0;
+	}
+
+	return ret;
+}
+
+/* Unpin the pages before releasing the memory. */
+static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
+{
+	vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+	pa->pa_nr = 0;
+	kfree(pa->pa_iova_pfn);
+}
+
+/* Alloc memory for PFNs, then pin pages with them. */
+static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
+			       u64 iova, unsigned int len)
+{
+	int ret = 0;
+
+	if (!len || pa->pa_nr)
+		return -EINVAL;
+
+	pa->pa_iova = iova;
+
+	pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (!pa->pa_nr)
+		return -EINVAL;
+
+	pa->pa_iova_pfn = kcalloc(pa->pa_nr,
+				  sizeof(*pa->pa_iova_pfn) +
+				  sizeof(*pa->pa_pfn),
+				  GFP_KERNEL);
+	if (unlikely(!pa->pa_iova_pfn))
+		return -ENOMEM;
+	pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
+
+	ret = pfn_array_pin(pa, mdev);
+
+	if (ret > 0)
+		return ret;
+	else if (!ret)
+		ret = -EINVAL;
+
+	kfree(pa->pa_iova_pfn);
+
+	return ret;
+}
+
+static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
+{
+	pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
+	if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
+		pat->pat_nr = 0;
+		return -ENOMEM;
+	}
+
+	pat->pat_nr = nr;
+
+	return 0;
+}
+
+static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
+				       struct device *mdev)
+{
+	int i;
+
+	for (i = 0; i < pat->pat_nr; i++)
+		pfn_array_unpin_free(pat->pat_pa + i, mdev);
+
+	if (pat->pat_nr) {
+		kfree(pat->pat_pa);
+		pat->pat_pa = NULL;
+		pat->pat_nr = 0;
+	}
+}
+
+static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
+					unsigned long iova)
+{
+	struct pfn_array *pa = pat->pat_pa;
+	unsigned long iova_pfn = iova >> PAGE_SHIFT;
+	int i, j;
+
+	for (i = 0; i < pat->pat_nr; i++, pa++)
+		for (j = 0; j < pa->pa_nr; j++)
+			if (pa->pa_iova_pfn[i] == iova_pfn)
+				return true;
+
+	return false;
+}
+/* Create the list idal words for a pfn_array_table. */
+static inline void pfn_array_table_idal_create_words(
+	struct pfn_array_table *pat,
+	unsigned long *idaws)
+{
+	struct pfn_array *pa;
+	int i, j, k;
+
+	/*
+	 * Idal words (execept the first one) rely on the memory being 4k
+	 * aligned. If a user virtual address is 4K aligned, then it's
+	 * corresponding kernel physical address will also be 4K aligned. Thus
+	 * there will be no problem here to simply use the phys to create an
+	 * idaw.
+	 */
+	k = 0;
+	for (i = 0; i < pat->pat_nr; i++) {
+		pa = pat->pat_pa + i;
+		for (j = 0; j < pa->pa_nr; j++) {
+			idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
+			if (k == 0)
+				idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
+			k++;
+		}
+	}
+}
+
+
+/*
+ * Within the domain (@mdev), copy @n bytes from a guest physical
+ * address (@iova) to a host physical address (@to).
+ */
+static long copy_from_iova(struct device *mdev,
+			   void *to, u64 iova,
+			   unsigned long n)
+{
+	struct pfn_array pa = {0};
+	u64 from;
+	int i, ret;
+	unsigned long l, m;
+
+	ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
+	if (ret <= 0)
+		return ret;
+
+	l = n;
+	for (i = 0; i < pa.pa_nr; i++) {
+		from = pa.pa_pfn[i] << PAGE_SHIFT;
+		m = PAGE_SIZE;
+		if (i == 0) {
+			from += iova & (PAGE_SIZE - 1);
+			m -= iova & (PAGE_SIZE - 1);
+		}
+
+		m = min(l, m);
+		memcpy(to + (n - l), (void *)from, m);
+
+		l -= m;
+		if (l == 0)
+			break;
+	}
+
+	pfn_array_unpin_free(&pa, mdev);
+
+	return l;
+}
+
+static long copy_ccw_from_iova(struct channel_program *cp,
+			       struct ccw1 *to, u64 iova,
+			       unsigned long len)
+{
+	struct ccw0 ccw0;
+	struct ccw1 *pccw1;
+	int ret;
+	int i;
+
+	ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
+	if (ret)
+		return ret;
+
+	if (!cp->orb.cmd.fmt) {
+		pccw1 = to;
+		for (i = 0; i < len; i++) {
+			ccw0 = *(struct ccw0 *)pccw1;
+			if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
+				pccw1->cmd_code = CCW_CMD_TIC;
+				pccw1->flags = 0;
+				pccw1->count = 0;
+			} else {
+				pccw1->cmd_code = ccw0.cmd_code;
+				pccw1->flags = ccw0.flags;
+				pccw1->count = ccw0.count;
+			}
+			pccw1->cda = ccw0.cda;
+			pccw1++;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Helpers to operate ccwchain.
+ */
+#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
+
+#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
+
+#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
+
+#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
+
+
+#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
+
+static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
+{
+	struct ccwchain *chain;
+	void *data;
+	size_t size;
+
+	/* Make ccw address aligned to 8. */
+	size = ((sizeof(*chain) + 7L) & -8L) +
+		sizeof(*chain->ch_ccw) * len +
+		sizeof(*chain->ch_pat) * len;
+	chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
+	if (!chain)
+		return NULL;
+
+	data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
+	chain->ch_ccw = (struct ccw1 *)data;
+
+	data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
+	chain->ch_pat = (struct pfn_array_table *)data;
+
+	chain->ch_len = len;
+
+	list_add_tail(&chain->next, &cp->ccwchain_list);
+
+	return chain;
+}
+
+static void ccwchain_free(struct ccwchain *chain)
+{
+	list_del(&chain->next);
+	kfree(chain);
+}
+
+/* Free resource for a ccw that allocated memory for its cda. */
+static void ccwchain_cda_free(struct ccwchain *chain, int idx)
+{
+	struct ccw1 *ccw = chain->ch_ccw + idx;
+
+	if (!ccw->count)
+		return;
+
+	kfree((void *)(u64)ccw->cda);
+}
+
+/* Unpin the pages then free the memory resources. */
+static void cp_unpin_free(struct channel_program *cp)
+{
+	struct ccwchain *chain, *temp;
+	int i;
+
+	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
+		for (i = 0; i < chain->ch_len; i++) {
+			pfn_array_table_unpin_free(chain->ch_pat + i,
+						   cp->mdev);
+			ccwchain_cda_free(chain, i);
+		}
+		ccwchain_free(chain);
+	}
+}
+
+/**
+ * ccwchain_calc_length - calculate the length of the ccw chain.
+ * @iova: guest physical address of the target ccw chain
+ * @cp: channel_program on which to perform the operation
+ *
+ * This is the chain length not considering any TICs.
+ * You need to do a new round for each TIC target.
+ *
+ * Returns: the length of the ccw chain or -errno.
+ */
+static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
+{
+	struct ccw1 *ccw, *p;
+	int cnt;
+
+	/*
+	 * Copy current chain from guest to host kernel.
+	 * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
+	 * So copying 2K is enough (safe).
+	 */
+	p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
+	if (!ccw)
+		return -ENOMEM;
+
+	cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
+	if (cnt) {
+		kfree(ccw);
+		return cnt;
+	}
+
+	cnt = 0;
+	do {
+		cnt++;
+
+		if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
+			break;
+
+		ccw++;
+	} while (cnt < CCWCHAIN_LEN_MAX + 1);
+
+	if (cnt == CCWCHAIN_LEN_MAX + 1)
+		cnt = -EINVAL;
+
+	kfree(p);
+	return cnt;
+}
+
+static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
+{
+	struct ccwchain *chain;
+	u32 ccw_head, ccw_tail;
+
+	list_for_each_entry(chain, &cp->ccwchain_list, next) {
+		ccw_head = chain->ch_iova;
+		ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
+
+		if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int ccwchain_loop_tic(struct ccwchain *chain,
+			     struct channel_program *cp);
+
+static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
+{
+	struct ccwchain *chain;
+	int len, ret;
+
+	/* May transfer to an existing chain. */
+	if (tic_target_chain_exists(tic, cp))
+		return 0;
+
+	/* Get chain length. */
+	len = ccwchain_calc_length(tic->cda, cp);
+	if (len < 0)
+		return len;
+
+	/* Need alloc a new chain for this one. */
+	chain = ccwchain_alloc(cp, len);
+	if (!chain)
+		return -ENOMEM;
+	chain->ch_iova = tic->cda;
+
+	/* Copy the new chain from user. */
+	ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
+	if (ret) {
+		ccwchain_free(chain);
+		return ret;
+	}
+
+	/* Loop for tics on this new chain. */
+	return ccwchain_loop_tic(chain, cp);
+}
+
+/* Loop for TICs. */
+static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
+{
+	struct ccw1 *tic;
+	int i, ret;
+
+	for (i = 0; i < chain->ch_len; i++) {
+		tic = chain->ch_ccw + i;
+
+		if (!ccw_is_tic(tic))
+			continue;
+
+		ret = ccwchain_handle_tic(tic, cp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ccwchain_fetch_tic(struct ccwchain *chain,
+			      int idx,
+			      struct channel_program *cp)
+{
+	struct ccw1 *ccw = chain->ch_ccw + idx;
+	struct ccwchain *iter;
+	u32 ccw_head, ccw_tail;
+
+	list_for_each_entry(iter, &cp->ccwchain_list, next) {
+		ccw_head = iter->ch_iova;
+		ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
+
+		if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
+			ccw->cda = (__u32) (addr_t) (iter->ch_ccw +
+						     (ccw->cda - ccw_head));
+			return 0;
+		}
+	}
+
+	return -EFAULT;
+}
+
+static int ccwchain_fetch_direct(struct ccwchain *chain,
+				 int idx,
+				 struct channel_program *cp)
+{
+	struct ccw1 *ccw;
+	struct pfn_array_table *pat;
+	unsigned long *idaws;
+	int idaw_nr;
+
+	ccw = chain->ch_ccw + idx;
+
+	/*
+	 * Pin data page(s) in memory.
+	 * The number of pages actually is the count of the idaws which will be
+	 * needed when translating a direct ccw to a idal ccw.
+	 */
+	pat = chain->ch_pat + idx;
+	if (pfn_array_table_init(pat, 1))
+		return -ENOMEM;
+	idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
+				      ccw->cda, ccw->count);
+	if (idaw_nr < 0)
+		return idaw_nr;
+
+	/* Translate this direct ccw to a idal ccw. */
+	idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
+	if (!idaws) {
+		pfn_array_table_unpin_free(pat, cp->mdev);
+		return -ENOMEM;
+	}
+	ccw->cda = (__u32) virt_to_phys(idaws);
+	ccw->flags |= CCW_FLAG_IDA;
+
+	pfn_array_table_idal_create_words(pat, idaws);
+
+	return 0;
+}
+
+static int ccwchain_fetch_idal(struct ccwchain *chain,
+			       int idx,
+			       struct channel_program *cp)
+{
+	struct ccw1 *ccw;
+	struct pfn_array_table *pat;
+	unsigned long *idaws;
+	u64 idaw_iova;
+	unsigned int idaw_nr, idaw_len;
+	int i, ret;
+
+	ccw = chain->ch_ccw + idx;
+
+	/* Calculate size of idaws. */
+	ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
+	if (ret)
+		return ret;
+	idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
+	idaw_len = idaw_nr * sizeof(*idaws);
+
+	/* Pin data page(s) in memory. */
+	pat = chain->ch_pat + idx;
+	ret = pfn_array_table_init(pat, idaw_nr);
+	if (ret)
+		return ret;
+
+	/* Translate idal ccw to use new allocated idaws. */
+	idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
+	if (!idaws) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
+
+	ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
+	if (ret)
+		goto out_free_idaws;
+
+	ccw->cda = virt_to_phys(idaws);
+
+	for (i = 0; i < idaw_nr; i++) {
+		idaw_iova = *(idaws + i);
+		if (IS_ERR_VALUE(idaw_iova)) {
+			ret = -EFAULT;
+			goto out_free_idaws;
+		}
+
+		ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
+					  idaw_iova, 1);
+		if (ret < 0)
+			goto out_free_idaws;
+	}
+
+	pfn_array_table_idal_create_words(pat, idaws);
+
+	return 0;
+
+out_free_idaws:
+	kfree(idaws);
+out_unpin:
+	pfn_array_table_unpin_free(pat, cp->mdev);
+	return ret;
+}
+
+/*
+ * Fetch one ccw.
+ * To reduce memory copy, we'll pin the cda page in memory,
+ * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
+ * direct ccws to idal ccws.
+ */
+static int ccwchain_fetch_one(struct ccwchain *chain,
+			      int idx,
+			      struct channel_program *cp)
+{
+	struct ccw1 *ccw = chain->ch_ccw + idx;
+
+	if (ccw_is_test(ccw) || ccw_is_noop(ccw))
+		return 0;
+
+	if (ccw_is_tic(ccw))
+		return ccwchain_fetch_tic(chain, idx, cp);
+
+	if (ccw_is_idal(ccw))
+		return ccwchain_fetch_idal(chain, idx, cp);
+
+	return ccwchain_fetch_direct(chain, idx, cp);
+}
+
+/**
+ * cp_init() - allocate ccwchains for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ * @orb: control block for the channel program from the guest
+ *
+ * This creates one or more ccwchain(s), and copies the raw data of
+ * the target channel program from @orb->cmd.iova to the new ccwchain(s).
+ *
+ * Limitations:
+ * 1. Supports only prefetch enabled mode.
+ * 2. Supports idal(c64) ccw chaining.
+ * 3. Supports 4k idaw.
+ *
+ * Returns:
+ *   %0 on success and a negative error value on failure.
+ */
+int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
+{
+	u64 iova = orb->cmd.cpa;
+	struct ccwchain *chain;
+	int len, ret;
+
+	/*
+	 * XXX:
+	 * Only support prefetch enable mode now.
+	 * Only support 64bit addressing idal.
+	 * Only support 4k IDAW.
+	 */
+	if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
+		return -EOPNOTSUPP;
+
+	INIT_LIST_HEAD(&cp->ccwchain_list);
+	memcpy(&cp->orb, orb, sizeof(*orb));
+	cp->mdev = mdev;
+
+	/* Get chain length. */
+	len = ccwchain_calc_length(iova, cp);
+	if (len < 0)
+		return len;
+
+	/* Alloc mem for the head chain. */
+	chain = ccwchain_alloc(cp, len);
+	if (!chain)
+		return -ENOMEM;
+	chain->ch_iova = iova;
+
+	/* Copy the head chain from guest. */
+	ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
+	if (ret) {
+		ccwchain_free(chain);
+		return ret;
+	}
+
+	/* Now loop for its TICs. */
+	ret = ccwchain_loop_tic(chain, cp);
+	if (ret)
+		cp_unpin_free(cp);
+
+	return ret;
+}
+
+
+/**
+ * cp_free() - free resources for channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This unpins the memory pages and frees the memory space occupied by
+ * @cp, which must have been returned by a previous call to cp_init().
+ * Otherwise, undefined behavior occurs.
+ */
+void cp_free(struct channel_program *cp)
+{
+	cp_unpin_free(cp);
+}
+
+/**
+ * cp_prefetch() - translate a guest physical address channel program to
+ *                 a real-device runnable channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This function translates the guest-physical-address channel program
+ * and stores the result to ccwchain list. @cp must have been
+ * initialized by a previous call with cp_init(). Otherwise, undefined
+ * behavior occurs.
+ *
+ * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
+ * as helpers to do ccw chain translation inside the kernel. Basically
+ * they accept a channel program issued by a virtual machine, and
+ * translate the channel program to a real-device runnable channel
+ * program.
+ *
+ * These APIs will copy the ccws into kernel-space buffers, and update
+ * the guest phsical addresses with their corresponding host physical
+ * addresses.  Then channel I/O device drivers could issue the
+ * translated channel program to real devices to perform an I/O
+ * operation.
+ *
+ * These interfaces are designed to support translation only for
+ * channel programs, which are generated and formatted by a
+ * guest. Thus this will make it possible for things like VFIO to
+ * leverage the interfaces to passthrough a channel I/O mediated
+ * device in QEMU.
+ *
+ * We support direct ccw chaining by translating them to idal ccws.
+ *
+ * Returns:
+ *   %0 on success and a negative error value on failure.
+ */
+int cp_prefetch(struct channel_program *cp)
+{
+	struct ccwchain *chain;
+	int len, idx, ret;
+
+	list_for_each_entry(chain, &cp->ccwchain_list, next) {
+		len = chain->ch_len;
+		for (idx = 0; idx < len; idx++) {
+			ret = ccwchain_fetch_one(chain, idx, cp);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * cp_get_orb() - get the orb of the channel program
+ * @cp: channel_program on which to perform the operation
+ * @intparm: new intparm for the returned orb
+ * @lpm: candidate value of the logical-path mask for the returned orb
+ *
+ * This function returns the address of the updated orb of the channel
+ * program. Channel I/O device drivers could use this orb to issue a
+ * ssch.
+ */
+union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
+{
+	union orb *orb;
+	struct ccwchain *chain;
+	struct ccw1 *cpa;
+
+	orb = &cp->orb;
+
+	orb->cmd.intparm = intparm;
+	orb->cmd.fmt = 1;
+	orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
+
+	if (orb->cmd.lpm == 0)
+		orb->cmd.lpm = lpm;
+
+	chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
+	cpa = chain->ch_ccw;
+	orb->cmd.cpa = (__u32) __pa(cpa);
+
+	return orb;
+}
+
+/**
+ * cp_update_scsw() - update scsw for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @scsw: I/O results of the channel program and also the target to be
+ *        updated
+ *
+ * @scsw contains the I/O results of the channel program that pointed
+ * to by @cp. However what @scsw->cpa stores is a host physical
+ * address, which is meaningless for the guest, which is waiting for
+ * the I/O results.
+ *
+ * This function updates @scsw->cpa to its coressponding guest physical
+ * address.
+ */
+void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
+{
+	struct ccwchain *chain;
+	u32 cpa = scsw->cmd.cpa;
+	u32 ccw_head, ccw_tail;
+
+	/*
+	 * LATER:
+	 * For now, only update the cmd.cpa part. We may need to deal with
+	 * other portions of the schib as well, even if we don't return them
+	 * in the ioctl directly. Path status changes etc.
+	 */
+	list_for_each_entry(chain, &cp->ccwchain_list, next) {
+		ccw_head = (u32)(u64)chain->ch_ccw;
+		ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
+
+		if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
+			/*
+			 * (cpa - ccw_head) is the offset value of the host
+			 * physical ccw to its chain head.
+			 * Adding this value to the guest physical ccw chain
+			 * head gets us the guest cpa.
+			 */
+			cpa = chain->ch_iova + (cpa - ccw_head);
+			break;
+		}
+	}
+
+	scsw->cmd.cpa = cpa;
+}
+
+/**
+ * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
+ * @cmd: ccwchain command on which to perform the operation
+ * @iova: the iova to check
+ *
+ * If the @iova is currently pinned for the ccw chain, return true;
+ * else return false.
+ */
+bool cp_iova_pinned(struct channel_program *cp, u64 iova)
+{
+	struct ccwchain *chain;
+	int i;
+
+	list_for_each_entry(chain, &cp->ccwchain_list, next) {
+		for (i = 0; i < chain->ch_len; i++)
+			if (pfn_array_table_iova_pinned(chain->ch_pat + i,
+							iova))
+				return true;
+	}
+
+	return false;
+}
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
new file mode 100644
index 000000000000..7a1996b3b36d
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -0,0 +1,42 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_CP_H_
+#define _VFIO_CCW_CP_H_
+
+#include <asm/cio.h>
+#include <asm/scsw.h>
+
+#include "orb.h"
+
+/**
+ * struct channel_program - manage information for channel program
+ * @ccwchain_list: list head of ccwchains
+ * @orb: orb for the currently processed ssch request
+ * @mdev: the mediated device to perform page pinning/unpinning
+ *
+ * @ccwchain_list is the head of a ccwchain list, that contents the
+ * translated result of the guest channel program that pointed out by
+ * the iova parameter when calling cp_init.
+ */
+struct channel_program {
+	struct list_head ccwchain_list;
+	union orb orb;
+	struct device *mdev;
+};
+
+extern int cp_init(struct channel_program *cp, struct device *mdev,
+		   union orb *orb);
+extern void cp_free(struct channel_program *cp);
+extern int cp_prefetch(struct channel_program *cp);
+extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
+extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw);
+extern bool cp_iova_pinned(struct channel_program *cp, u64 iova);
+
+#endif
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
new file mode 100644
index 000000000000..e90dd43d2a55
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -0,0 +1,308 @@
+/*
+ * VFIO based Physical Subchannel device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/mdev.h>
+
+#include <asm/isc.h>
+
+#include "ioasm.h"
+#include "css.h"
+#include "vfio_ccw_private.h"
+
+struct workqueue_struct *vfio_ccw_work_q;
+
+/*
+ * Helpers
+ */
+int vfio_ccw_sch_quiesce(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	DECLARE_COMPLETION_ONSTACK(completion);
+	int iretry, ret = 0;
+
+	spin_lock_irq(sch->lock);
+	if (!sch->schib.pmcw.ena)
+		goto out_unlock;
+	ret = cio_disable_subchannel(sch);
+	if (ret != -EBUSY)
+		goto out_unlock;
+
+	do {
+		iretry = 255;
+
+		ret = cio_cancel_halt_clear(sch, &iretry);
+		while (ret == -EBUSY) {
+			/*
+			 * Flush all I/O and wait for
+			 * cancel/halt/clear completion.
+			 */
+			private->completion = &completion;
+			spin_unlock_irq(sch->lock);
+
+			wait_for_completion_timeout(&completion, 3*HZ);
+
+			spin_lock_irq(sch->lock);
+			private->completion = NULL;
+			flush_workqueue(vfio_ccw_work_q);
+			ret = cio_cancel_halt_clear(sch, &iretry);
+		};
+
+		ret = cio_disable_subchannel(sch);
+	} while (ret == -EBUSY);
+out_unlock:
+	private->state = VFIO_CCW_STATE_NOT_OPER;
+	spin_unlock_irq(sch->lock);
+	return ret;
+}
+
+static void vfio_ccw_sch_io_todo(struct work_struct *work)
+{
+	struct vfio_ccw_private *private;
+	struct subchannel *sch;
+	struct irb *irb;
+
+	private = container_of(work, struct vfio_ccw_private, io_work);
+	irb = &private->irb;
+	sch = private->sch;
+
+	if (scsw_is_solicited(&irb->scsw)) {
+		cp_update_scsw(&private->cp, &irb->scsw);
+		cp_free(&private->cp);
+	}
+	memcpy(private->io_region.irb_area, irb, sizeof(*irb));
+
+	if (private->io_trigger)
+		eventfd_signal(private->io_trigger, 1);
+
+	if (private->mdev)
+		private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Sysfs interfaces
+ */
+static ssize_t chpids_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct subchannel *sch = to_subchannel(dev);
+	struct chsc_ssd_info *ssd = &sch->ssd_info;
+	ssize_t ret = 0;
+	int chp;
+	int mask;
+
+	for (chp = 0; chp < 8; chp++) {
+		mask = 0x80 >> chp;
+		if (ssd->path_mask & mask)
+			ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+		else
+			ret += sprintf(buf + ret, "00 ");
+	}
+	ret += sprintf(buf+ret, "\n");
+	return ret;
+}
+
+static ssize_t pimpampom_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct subchannel *sch = to_subchannel(dev);
+	struct pmcw *pmcw = &sch->schib.pmcw;
+
+	return sprintf(buf, "%02x %02x %02x\n",
+		       pmcw->pim, pmcw->pam, pmcw->pom);
+}
+
+static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
+static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
+
+static struct attribute *vfio_subchannel_attrs[] = {
+	&dev_attr_chpids.attr,
+	&dev_attr_pimpampom.attr,
+	NULL,
+};
+
+static struct attribute_group vfio_subchannel_attr_group = {
+	.attrs = vfio_subchannel_attrs,
+};
+
+/*
+ * Css driver callbacks
+ */
+static void vfio_ccw_sch_irq(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+	inc_irq_stat(IRQIO_CIO);
+	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
+}
+
+static int vfio_ccw_sch_probe(struct subchannel *sch)
+{
+	struct pmcw *pmcw = &sch->schib.pmcw;
+	struct vfio_ccw_private *private;
+	int ret;
+
+	if (pmcw->qf) {
+		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
+			 dev_name(&sch->dev));
+		return -ENODEV;
+	}
+
+	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+	if (!private)
+		return -ENOMEM;
+	private->sch = sch;
+	dev_set_drvdata(&sch->dev, private);
+
+	spin_lock_irq(sch->lock);
+	private->state = VFIO_CCW_STATE_NOT_OPER;
+	sch->isc = VFIO_CCW_ISC;
+	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+	spin_unlock_irq(sch->lock);
+	if (ret)
+		goto out_free;
+
+	ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+	if (ret)
+		goto out_disable;
+
+	ret = vfio_ccw_mdev_reg(sch);
+	if (ret)
+		goto out_rm_group;
+
+	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+	atomic_set(&private->avail, 1);
+	private->state = VFIO_CCW_STATE_STANDBY;
+
+	return 0;
+
+out_rm_group:
+	sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+out_disable:
+	cio_disable_subchannel(sch);
+out_free:
+	dev_set_drvdata(&sch->dev, NULL);
+	kfree(private);
+	return ret;
+}
+
+static int vfio_ccw_sch_remove(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+	vfio_ccw_sch_quiesce(sch);
+
+	vfio_ccw_mdev_unreg(sch);
+
+	sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+
+	dev_set_drvdata(&sch->dev, NULL);
+
+	kfree(private);
+
+	return 0;
+}
+
+static void vfio_ccw_sch_shutdown(struct subchannel *sch)
+{
+	vfio_ccw_sch_quiesce(sch);
+}
+
+/**
+ * vfio_ccw_sch_event - process subchannel event
+ * @sch: subchannel
+ * @process: non-zero if function is called in process context
+ *
+ * An unspecified event occurred for this subchannel. Adjust data according
+ * to the current operational state of the subchannel. Return zero when the
+ * event has been handled sufficiently or -EAGAIN when this function should
+ * be called again in process context.
+ */
+static int vfio_ccw_sch_event(struct subchannel *sch, int process)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(sch->lock, flags);
+	if (!device_is_registered(&sch->dev))
+		goto out_unlock;
+
+	if (work_pending(&sch->todo_work))
+		goto out_unlock;
+
+	if (cio_update_schib(sch)) {
+		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+		goto out_unlock;
+	}
+
+	private = dev_get_drvdata(&sch->dev);
+	if (private->state == VFIO_CCW_STATE_NOT_OPER) {
+		private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
+				 VFIO_CCW_STATE_STANDBY;
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(sch->lock, flags);
+
+	return 0;
+}
+
+static struct css_device_id vfio_ccw_sch_ids[] = {
+	{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
+
+static struct css_driver vfio_ccw_sch_driver = {
+	.drv = {
+		.name = "vfio_ccw",
+		.owner = THIS_MODULE,
+	},
+	.subchannel_type = vfio_ccw_sch_ids,
+	.irq = vfio_ccw_sch_irq,
+	.probe = vfio_ccw_sch_probe,
+	.remove = vfio_ccw_sch_remove,
+	.shutdown = vfio_ccw_sch_shutdown,
+	.sch_event = vfio_ccw_sch_event,
+};
+
+static int __init vfio_ccw_sch_init(void)
+{
+	int ret;
+
+	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
+	if (!vfio_ccw_work_q)
+		return -ENOMEM;
+
+	isc_register(VFIO_CCW_ISC);
+	ret = css_driver_register(&vfio_ccw_sch_driver);
+	if (ret) {
+		isc_unregister(VFIO_CCW_ISC);
+		destroy_workqueue(vfio_ccw_work_q);
+	}
+
+	return ret;
+}
+
+static void __exit vfio_ccw_sch_exit(void)
+{
+	css_driver_unregister(&vfio_ccw_sch_driver);
+	isc_unregister(VFIO_CCW_ISC);
+	destroy_workqueue(vfio_ccw_work_q);
+}
+module_init(vfio_ccw_sch_init);
+module_exit(vfio_ccw_sch_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
new file mode 100644
index 000000000000..80a0559cd7ce
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -0,0 +1,203 @@
+/*
+ * Finite state machine for vfio-ccw device handling
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "ioasm.h"
+#include "vfio_ccw_private.h"
+
+static int fsm_io_helper(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	union orb *orb;
+	int ccode;
+	__u8 lpm;
+	unsigned long flags;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+	private->state = VFIO_CCW_STATE_BUSY;
+	spin_unlock_irqrestore(sch->lock, flags);
+
+	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+
+	/* Issue "Start Subchannel" */
+	ccode = ssch(sch->schid, orb);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
+		return 0;
+	case 1:		/* Status pending */
+	case 2:		/* Busy */
+		return -EBUSY;
+	case 3:		/* Device/path not operational */
+	{
+		lpm = orb->cmd.lpm;
+		if (lpm != 0)
+			sch->lpm &= ~lpm;
+		else
+			sch->lpm = 0;
+
+		if (cio_update_schib(sch))
+			return -ENODEV;
+
+		return sch->lpm ? -EACCES : -ENODEV;
+	}
+	default:
+		return ccode;
+	}
+}
+
+static void fsm_notoper(struct vfio_ccw_private *private,
+			enum vfio_ccw_event event)
+{
+	struct subchannel *sch = private->sch;
+
+	/*
+	 * TODO:
+	 * Probably we should send the machine check to the guest.
+	 */
+	css_sched_sch_todo(sch, SCH_TODO_UNREG);
+	private->state = VFIO_CCW_STATE_NOT_OPER;
+}
+
+/*
+ * No operation action.
+ */
+static void fsm_nop(struct vfio_ccw_private *private,
+		    enum vfio_ccw_event event)
+{
+}
+
+static void fsm_io_error(struct vfio_ccw_private *private,
+			 enum vfio_ccw_event event)
+{
+	pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
+	private->io_region.ret_code = -EIO;
+}
+
+static void fsm_io_busy(struct vfio_ccw_private *private,
+			enum vfio_ccw_event event)
+{
+	private->io_region.ret_code = -EBUSY;
+}
+
+static void fsm_disabled_irq(struct vfio_ccw_private *private,
+			     enum vfio_ccw_event event)
+{
+	struct subchannel *sch = private->sch;
+
+	/*
+	 * An interrupt in a disabled state means a previous disable was not
+	 * successful - should not happen, but we try to disable again.
+	 */
+	cio_disable_subchannel(sch);
+}
+
+/*
+ * Deal with the ccw command request from the userspace.
+ */
+static void fsm_io_request(struct vfio_ccw_private *private,
+			   enum vfio_ccw_event event)
+{
+	union orb *orb;
+	union scsw *scsw = &private->scsw;
+	struct ccw_io_region *io_region = &private->io_region;
+	struct mdev_device *mdev = private->mdev;
+
+	private->state = VFIO_CCW_STATE_BOXED;
+
+	memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
+
+	if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
+		orb = (union orb *)io_region->orb_area;
+
+		io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
+					      orb);
+		if (io_region->ret_code)
+			goto err_out;
+
+		io_region->ret_code = cp_prefetch(&private->cp);
+		if (io_region->ret_code) {
+			cp_free(&private->cp);
+			goto err_out;
+		}
+
+		/* Start channel program and wait for I/O interrupt. */
+		io_region->ret_code = fsm_io_helper(private);
+		if (io_region->ret_code) {
+			cp_free(&private->cp);
+			goto err_out;
+		}
+		return;
+	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
+		/* XXX: Handle halt. */
+		io_region->ret_code = -EOPNOTSUPP;
+		goto err_out;
+	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
+		/* XXX: Handle clear. */
+		io_region->ret_code = -EOPNOTSUPP;
+		goto err_out;
+	}
+
+err_out:
+	private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Got an interrupt for a normal io (state busy).
+ */
+static void fsm_irq(struct vfio_ccw_private *private,
+		    enum vfio_ccw_event event)
+{
+	struct irb *irb = this_cpu_ptr(&cio_irb);
+
+	memcpy(&private->irb, irb, sizeof(*irb));
+
+	queue_work(vfio_ccw_work_q, &private->io_work);
+
+	if (private->completion)
+		complete(private->completion);
+}
+
+/*
+ * Device statemachine
+ */
+fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
+	[VFIO_CCW_STATE_NOT_OPER] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
+	},
+	[VFIO_CCW_STATE_STANDBY] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+	[VFIO_CCW_STATE_IDLE] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+	[VFIO_CCW_STATE_BOXED] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+	[VFIO_CCW_STATE_BUSY] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+};
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
new file mode 100644
index 000000000000..e72abbc18ee3
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -0,0 +1,425 @@
+/*
+ * Physical device callbacks for vfio_ccw
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private;
+	struct subchannel *sch;
+	int ret;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	sch = private->sch;
+	/*
+	 * TODO:
+	 * In the cureent stage, some things like "no I/O running" and "no
+	 * interrupt pending" are clear, but we are not sure what other state
+	 * we need to care about.
+	 * There are still a lot more instructions need to be handled. We
+	 * should come back here later.
+	 */
+	ret = vfio_ccw_sch_quiesce(sch);
+	if (ret)
+		return ret;
+
+	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+	if (!ret)
+		private->state = VFIO_CCW_STATE_IDLE;
+
+	return ret;
+}
+
+static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
+				  unsigned long action,
+				  void *data)
+{
+	struct vfio_ccw_private *private =
+		container_of(nb, struct vfio_ccw_private, nb);
+
+	/*
+	 * Vendor drivers MUST unpin pages in response to an
+	 * invalidation.
+	 */
+	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+		struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+		if (!cp_iova_pinned(&private->cp, unmap->iova))
+			return NOTIFY_OK;
+
+		if (vfio_ccw_mdev_reset(private->mdev))
+			return NOTIFY_BAD;
+
+		cp_free(&private->cp);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+			       char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+					struct device *dev, char *buf)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", atomic_read(&private->avail));
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static struct attribute *mdev_types_attrs[] = {
+	&mdev_type_attr_name.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_available_instances.attr,
+	NULL,
+};
+
+static struct attribute_group mdev_type_group = {
+	.name  = "io",
+	.attrs = mdev_types_attrs,
+};
+
+struct attribute_group *mdev_type_groups[] = {
+	&mdev_type_group,
+	NULL,
+};
+
+static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private =
+		dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (private->state == VFIO_CCW_STATE_NOT_OPER)
+		return -ENODEV;
+
+	if (atomic_dec_if_positive(&private->avail) < 0)
+		return -EPERM;
+
+	private->mdev = mdev;
+	private->state = VFIO_CCW_STATE_IDLE;
+
+	return 0;
+}
+
+static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private =
+		dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
+	    (private->state != VFIO_CCW_STATE_STANDBY)) {
+		if (!vfio_ccw_mdev_reset(mdev))
+			private->state = VFIO_CCW_STATE_STANDBY;
+		/* The state will be NOT_OPER on error. */
+	}
+
+	private->mdev = NULL;
+	atomic_inc(&private->avail);
+
+	return 0;
+}
+
+static int vfio_ccw_mdev_open(struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private =
+		dev_get_drvdata(mdev_parent_dev(mdev));
+	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+	private->nb.notifier_call = vfio_ccw_mdev_notifier;
+
+	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				      &events, &private->nb);
+}
+
+void vfio_ccw_mdev_release(struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private =
+		dev_get_drvdata(mdev_parent_dev(mdev));
+
+	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				 &private->nb);
+}
+
+static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos)
+{
+	struct vfio_ccw_private *private;
+	struct ccw_io_region *region;
+
+	if (*ppos + count > sizeof(*region))
+		return -EINVAL;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	region = &private->io_region;
+	if (copy_to_user(buf, (void *)region + *ppos, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
+				   const char __user *buf,
+				   size_t count,
+				   loff_t *ppos)
+{
+	struct vfio_ccw_private *private;
+	struct ccw_io_region *region;
+
+	if (*ppos + count > sizeof(*region))
+		return -EINVAL;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	if (private->state != VFIO_CCW_STATE_IDLE)
+		return -EACCES;
+
+	region = &private->io_region;
+	if (copy_from_user((void *)region + *ppos, buf, count))
+		return -EFAULT;
+
+	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
+	if (region->ret_code != 0) {
+		private->state = VFIO_CCW_STATE_IDLE;
+		return region->ret_code;
+	}
+
+	return count;
+}
+
+static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
+{
+	info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
+	info->num_regions = VFIO_CCW_NUM_REGIONS;
+	info->num_irqs = VFIO_CCW_NUM_IRQS;
+
+	return 0;
+}
+
+static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
+					 u16 *cap_type_id,
+					 void **cap_type)
+{
+	switch (info->index) {
+	case VFIO_CCW_CONFIG_REGION_INDEX:
+		info->offset = 0;
+		info->size = sizeof(struct ccw_io_region);
+		info->flags = VFIO_REGION_INFO_FLAG_READ
+			      | VFIO_REGION_INFO_FLAG_WRITE;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+{
+	if (info->index != VFIO_CCW_IO_IRQ_INDEX)
+		return -EINVAL;
+
+	info->count = 1;
+	info->flags = VFIO_IRQ_INFO_EVENTFD;
+
+	return 0;
+}
+
+static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
+				  uint32_t flags,
+				  void __user *data)
+{
+	struct vfio_ccw_private *private;
+	struct eventfd_ctx **ctx;
+
+	if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER))
+		return -EINVAL;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	ctx = &private->io_trigger;
+
+	switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+	case VFIO_IRQ_SET_DATA_NONE:
+	{
+		if (*ctx)
+			eventfd_signal(*ctx, 1);
+		return 0;
+	}
+	case VFIO_IRQ_SET_DATA_BOOL:
+	{
+		uint8_t trigger;
+
+		if (get_user(trigger, (uint8_t __user *)data))
+			return -EFAULT;
+
+		if (trigger && *ctx)
+			eventfd_signal(*ctx, 1);
+		return 0;
+	}
+	case VFIO_IRQ_SET_DATA_EVENTFD:
+	{
+		int32_t fd;
+
+		if (get_user(fd, (int32_t __user *)data))
+			return -EFAULT;
+
+		if (fd == -1) {
+			if (*ctx)
+				eventfd_ctx_put(*ctx);
+			*ctx = NULL;
+		} else if (fd >= 0) {
+			struct eventfd_ctx *efdctx;
+
+			efdctx = eventfd_ctx_fdget(fd);
+			if (IS_ERR(efdctx))
+				return PTR_ERR(efdctx);
+
+			if (*ctx)
+				eventfd_ctx_put(*ctx);
+
+			*ctx = efdctx;
+		} else
+			return -EINVAL;
+
+		return 0;
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
+				   unsigned int cmd,
+				   unsigned long arg)
+{
+	int ret = 0;
+	unsigned long minsz;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = vfio_ccw_mdev_get_device_info(&info);
+		if (ret)
+			return ret;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+		u16 cap_type_id = 0;
+		void *cap_type = NULL;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
+						    &cap_type);
+		if (ret)
+			return ret;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS)
+			return -EINVAL;
+
+		ret = vfio_ccw_mdev_get_irq_info(&info);
+		if (ret)
+			return ret;
+
+		if (info.count == -1)
+			return -EINVAL;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_SET_IRQS:
+	{
+		struct vfio_irq_set hdr;
+		size_t data_size;
+		void __user *data;
+
+		minsz = offsetofend(struct vfio_irq_set, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		ret = vfio_set_irqs_validate_and_prepare(&hdr, 1,
+							 VFIO_CCW_NUM_IRQS,
+							 &data_size);
+		if (ret)
+			return ret;
+
+		data = (void __user *)(arg + minsz);
+		return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data);
+	}
+	case VFIO_DEVICE_RESET:
+		return vfio_ccw_mdev_reset(mdev);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
+	.owner			= THIS_MODULE,
+	.supported_type_groups  = mdev_type_groups,
+	.create			= vfio_ccw_mdev_create,
+	.remove			= vfio_ccw_mdev_remove,
+	.open			= vfio_ccw_mdev_open,
+	.release		= vfio_ccw_mdev_release,
+	.read			= vfio_ccw_mdev_read,
+	.write			= vfio_ccw_mdev_write,
+	.ioctl			= vfio_ccw_mdev_ioctl,
+};
+
+int vfio_ccw_mdev_reg(struct subchannel *sch)
+{
+	return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops);
+}
+
+void vfio_ccw_mdev_unreg(struct subchannel *sch)
+{
+	mdev_unregister_device(&sch->dev);
+}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
new file mode 100644
index 000000000000..fc0f01c16ef9
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -0,0 +1,96 @@
+/*
+ * Private stuff for vfio_ccw driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_PRIVATE_H_
+#define _VFIO_CCW_PRIVATE_H_
+
+#include <linux/completion.h>
+#include <linux/eventfd.h>
+#include <linux/workqueue.h>
+#include <linux/vfio_ccw.h>
+
+#include "css.h"
+#include "vfio_ccw_cp.h"
+
+/**
+ * struct vfio_ccw_private
+ * @sch: pointer to the subchannel
+ * @state: internal state of the device
+ * @completion: synchronization helper of the I/O completion
+ * @avail: available for creating a mediated device
+ * @mdev: pointer to the mediated device
+ * @nb: notifier for vfio events
+ * @io_region: MMIO region to input/output I/O arguments/results
+ * @cp: channel program for the current I/O operation
+ * @irb: irb info received from interrupt
+ * @scsw: scsw info
+ * @io_trigger: eventfd ctx for signaling userspace I/O results
+ * @io_work: work for deferral process of I/O handling
+ */
+struct vfio_ccw_private {
+	struct subchannel	*sch;
+	int			state;
+	struct completion	*completion;
+	atomic_t		avail;
+	struct mdev_device	*mdev;
+	struct notifier_block	nb;
+	struct ccw_io_region	io_region;
+
+	struct channel_program	cp;
+	struct irb		irb;
+	union scsw		scsw;
+
+	struct eventfd_ctx	*io_trigger;
+	struct work_struct	io_work;
+} __aligned(8);
+
+extern int vfio_ccw_mdev_reg(struct subchannel *sch);
+extern void vfio_ccw_mdev_unreg(struct subchannel *sch);
+
+extern int vfio_ccw_sch_quiesce(struct subchannel *sch);
+
+/*
+ * States of the device statemachine.
+ */
+enum vfio_ccw_state {
+	VFIO_CCW_STATE_NOT_OPER,
+	VFIO_CCW_STATE_STANDBY,
+	VFIO_CCW_STATE_IDLE,
+	VFIO_CCW_STATE_BOXED,
+	VFIO_CCW_STATE_BUSY,
+	/* last element! */
+	NR_VFIO_CCW_STATES
+};
+
+/*
+ * Asynchronous events of the device statemachine.
+ */
+enum vfio_ccw_event {
+	VFIO_CCW_EVENT_NOT_OPER,
+	VFIO_CCW_EVENT_IO_REQ,
+	VFIO_CCW_EVENT_INTERRUPT,
+	/* last element! */
+	NR_VFIO_CCW_EVENTS
+};
+
+/*
+ * Action called through jumptable.
+ */
+typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
+extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
+
+static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
+				     int event)
+{
+	vfio_ccw_jumptable[private->state][event](private, event);
+}
+
+extern struct workqueue_struct *vfio_ccw_work_q;
+
+#endif
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 058db724b5a2..ea86da8c75f9 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -80,7 +80,7 @@ struct secaeskeytoken {
  * token. If keybitsize is given, the bitsize of the key is
  * also checked. Returns 0 on success or errno value on failure.
  */
-static int check_secaeskeytoken(u8 *token, int keybitsize)
+static int check_secaeskeytoken(const u8 *token, int keybitsize)
 {
 	struct secaeskeytoken *t = (struct secaeskeytoken *) token;
 
@@ -1004,6 +1004,53 @@ int pkey_skey2pkey(const struct pkey_seckey *seckey,
 EXPORT_SYMBOL(pkey_skey2pkey);
 
 /*
+ * Verify key and give back some info about the key.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+		   u16 *pcardnr, u16 *pdomain,
+		   u16 *pkeysize, u32 *pattributes)
+{
+	struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
+	u16 cardnr, domain;
+	u64 mkvp[2];
+	int rc;
+
+	/* check the secure key for valid AES secure key */
+	rc = check_secaeskeytoken((u8 *) seckey, 0);
+	if (rc)
+		goto out;
+	if (pattributes)
+		*pattributes = PKEY_VERIFY_ATTR_AES;
+	if (pkeysize)
+		*pkeysize = t->bitsize;
+
+	/* try to find a card which can handle this key */
+	rc = pkey_findcard(seckey, &cardnr, &domain, 1);
+	if (rc)
+		goto out;
+
+	/* check mkvp for old mkvp match */
+	rc = mkvp_cache_fetch(cardnr, domain, mkvp);
+	if (rc)
+		goto out;
+	if (t->mkvp == mkvp[1]) {
+		DEBUG_DBG("pkey_verifykey secure key has old mkvp\n");
+		if (pattributes)
+			*pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP;
+	}
+
+	if (pcardnr)
+		*pcardnr = cardnr;
+	if (pdomain)
+		*pdomain = domain;
+
+out:
+	DEBUG_DBG("pkey_verifykey rc=%d\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(pkey_verifykey);
+
+/*
  * File io functions
  */
 
@@ -1104,6 +1151,21 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
 			return -EFAULT;
 		break;
 	}
+	case PKEY_VERIFYKEY: {
+		struct pkey_verifykey __user *uvk = (void __user *) arg;
+		struct pkey_verifykey kvk;
+
+		if (copy_from_user(&kvk, uvk, sizeof(kvk)))
+			return -EFAULT;
+		rc = pkey_verifykey(&kvk.seckey, &kvk.cardnr, &kvk.domain,
+				    &kvk.keysize, &kvk.attributes);
+		DEBUG_DBG("pkey_ioctl pkey_verifykey()=%d\n", rc);
+		if (rc)
+			break;
+		if (copy_to_user(uvk, &kvk, sizeof(kvk)))
+			return -EFAULT;
+		break;
+	}
 	default:
 		/* unknown/unsupported ioctl cmd */
 		return -ENOTTY;
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..176b6cb1008d 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,8 @@ typedef struct elf64_shdr {
 #define NT_S390_TDB	0x308		/* s390 transaction diagnostic block */
 #define NT_S390_VXRS_LOW	0x309	/* s390 vector registers 0-15 upper half */
 #define NT_S390_VXRS_HIGH	0x30a	/* s390 vector registers 16-31 */
+#define NT_S390_GS_CB	0x30b		/* s390 guarded storage registers */
+#define NT_S390_GS_BC	0x30c		/* s390 guarded storage broadcast control block */
 #define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
 #define NT_ARM_TLS	0x401		/* ARM TLS register */
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 519eff362c1c..ae461050661a 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -198,6 +198,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
+#define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
@@ -212,6 +213,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_API_PCI_STRING		"vfio-pci"
 #define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
 #define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
+#define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
 
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
@@ -446,6 +448,22 @@ enum {
 	VFIO_PCI_NUM_IRQS
 };
 
+/*
+ * The vfio-ccw bus driver makes use of the following fixed region and
+ * IRQ index mapping. Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_CCW_CONFIG_REGION_INDEX,
+	VFIO_CCW_NUM_REGIONS
+};
+
+enum {
+	VFIO_CCW_IO_IRQ_INDEX,
+	VFIO_CCW_NUM_IRQS
+};
+
 /**
  * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
  *					      struct vfio_pci_hot_reset_info)
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
new file mode 100644
index 000000000000..34a7f6f9e065
--- /dev/null
+++ b/include/uapi/linux/vfio_ccw.h
@@ -0,0 +1,24 @@
+/*
+ * Interfaces for vfio-ccw
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_H_
+#define _VFIO_CCW_H_
+
+#include <linux/types.h>
+
+struct ccw_io_region {
+#define ORB_AREA_SIZE 12
+	__u8	orb_area[ORB_AREA_SIZE];
+#define SCSW_AREA_SIZE 12
+	__u8	scsw_area[SCSW_AREA_SIZE];
+#define IRB_AREA_SIZE 96
+	__u8	irb_area[IRB_AREA_SIZE];
+	__u32	ret_code;
+} __packed;
+
+#endif