summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2018-10-04 18:12:45 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2018-10-04 18:12:45 +0300
commitdd5bd0a65ff6f22a32b35ca3fa1bcf7a6bc7104f (patch)
tree7214fc9fb55a1cedee4d0bca3a4f6799414236e7
parent7e7126846c95a34f98a1524d5c473af1f0783735 (diff)
parent55d09dd4c86060fbbc74ab2b1bfaed401cd0163a (diff)
downloadlinux-dd5bd0a65ff6f22a32b35ca3fa1bcf7a6bc7104f.tar.xz
Merge tag 'kvm-s390-next-4.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: Features for 4.20 - Initial version of AP crypto virtualization via vfio-mdev - Set the host program identifier - Optimize page table locking
-rw-r--r--Documentation/s390/vfio-ap.txt837
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/include/asm/kvm_host.h13
-rw-r--r--arch/s390/include/uapi/asm/kvm.h2
-rw-r--r--arch/s390/kvm/kvm-s390.c135
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/vsie.c210
-rw-r--r--arch/s390/mm/gmap.c10
-rw-r--r--arch/s390/tools/gen_facilities.c2
-rw-r--r--drivers/iommu/Kconfig8
-rw-r--r--drivers/s390/crypto/Makefile4
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c157
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c968
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h88
-rw-r--r--include/uapi/linux/vfio.h2
16 files changed, 2397 insertions, 63 deletions
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.txt
new file mode 100644
index 000000000000..65167cfe4485
--- /dev/null
+++ b/Documentation/s390/vfio-ap.txt
@@ -0,0 +1,837 @@
+Introduction:
+============
+The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
+of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
+The AP devices provide cryptographic functions to all CPUs assigned to a
+linux system running in an IBM Z system LPAR.
+
+The AP adapter cards are exposed via the AP bus. The motivation for vfio-ap
+is to make AP cards available to KVM guests using the VFIO mediated device
+framework. This implementation relies considerably on the s390 virtualization
+facilities which do most of the hard work of providing direct access to AP
+devices.
+
+AP Architectural Overview:
+=========================
+To facilitate the comprehension of the design, let's start with some
+definitions:
+
+* AP adapter
+
+ An AP adapter is an IBM Z adapter card that can perform cryptographic
+ functions. There can be from 0 to 256 adapters assigned to an LPAR. Adapters
+ assigned to the LPAR in which a linux host is running will be available to
+ the linux host. Each adapter is identified by a number from 0 to 255; however,
+ the maximum adapter number is determined by machine model and/or adapter type.
+ When installed, an AP adapter is accessed by AP instructions executed by any
+ CPU.
+
+ The AP adapter cards are assigned to a given LPAR via the system's Activation
+ Profile which can be edited via the HMC. When the linux host system is IPL'd
+ in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
+ creates a sysfs device for each assigned adapter. For example, if AP adapters
+ 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
+ sysfs device entries:
+
+ /sys/devices/ap/card04
+ /sys/devices/ap/card0a
+
+ Symbolic links to these devices will also be created in the AP bus devices
+ sub-directory:
+
+ /sys/bus/ap/devices/[card04]
+ /sys/bus/ap/devices/[card04]
+
+* AP domain
+
+ An adapter is partitioned into domains. An adapter can hold up to 256 domains
+ depending upon the adapter type and hardware configuration. A domain is
+ identified by a number from 0 to 255; however, the maximum domain number is
+ determined by machine model and/or adapter type.. A domain can be thought of
+ as a set of hardware registers and memory used for processing AP commands. A
+ domain can be configured with a secure private key used for clear key
+ encryption. A domain is classified in one of two ways depending upon how it
+ may be accessed:
+
+ * Usage domains are domains that are targeted by an AP instruction to
+ process an AP command.
+
+ * Control domains are domains that are changed by an AP command sent to a
+ usage domain; for example, to set the secure private key for the control
+ domain.
+
+ The AP usage and control domains are assigned to a given LPAR via the system's
+ Activation Profile which can be edited via the HMC. When a linux host system
+ is IPL'd in the LPAR, the AP bus module detects the AP usage and control
+ domains assigned to the LPAR. The domain number of each usage domain and
+ adapter number of each AP adapter are combined to create AP queue devices
+ (see AP Queue section below). The domain number of each control domain will be
+ represented in a bitmask and stored in a sysfs file
+ /sys/bus/ap/ap_control_domain_mask. The bits in the mask, from most to least
+ significant bit, correspond to domains 0-255.
+
+* AP Queue
+
+ An AP queue is the means by which an AP command is sent to a usage domain
+ inside a specific adapter. An AP queue is identified by a tuple
+ comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
+ APQI corresponds to a given usage domain number within the adapter. This tuple
+ forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
+ instructions include a field containing the APQN to identify the AP queue to
+ which the AP command is to be sent for processing.
+
+ The AP bus will create a sysfs device for each APQN that can be derived from
+ the cross product of the AP adapter and usage domain numbers detected when the
+ AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
+ domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
+ following sysfs entries:
+
+ /sys/devices/ap/card04/04.0006
+ /sys/devices/ap/card04/04.0047
+ /sys/devices/ap/card0a/0a.0006
+ /sys/devices/ap/card0a/0a.0047
+
+ The following symbolic links to these devices will be created in the AP bus
+ devices subdirectory:
+
+ /sys/bus/ap/devices/[04.0006]
+ /sys/bus/ap/devices/[04.0047]
+ /sys/bus/ap/devices/[0a.0006]
+ /sys/bus/ap/devices/[0a.0047]
+
+* AP Instructions:
+
+ There are three AP instructions:
+
+ * NQAP: to enqueue an AP command-request message to a queue
+ * DQAP: to dequeue an AP command-reply message from a queue
+ * PQAP: to administer the queues
+
+ AP instructions identify the domain that is targeted to process the AP
+ command; this must be one of the usage domains. An AP command may modify a
+ domain that is not one of the usage domains, but the modified domain
+ must be one of the control domains.
+
+AP and SIE:
+==========
+Let's now take a look at how AP instructions executed on a guest are interpreted
+by the hardware.
+
+A satellite control block called the Crypto Control Block (CRYCB) is attached to
+our main hardware virtualization control block. The CRYCB contains three fields
+to identify the adapters, usage domains and control domains assigned to the KVM
+guest:
+
+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
+ to the KVM guest. Each bit in the mask, from left to right (i.e. from most
+ significant to least significant bit in big endian order), corresponds to
+ an APID from 0-255. If a bit is set, the corresponding adapter is valid for
+ use by the KVM guest.
+
+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
+ assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
+ most significant to least significant bit in big endian order), corresponds to
+ an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
+ is valid for use by the KVM guest.
+
+* The AP Domain Mask field is a bit mask that identifies the AP control domains
+ assigned to the KVM guest. The ADM bit mask controls which domains can be
+ changed by an AP command-request message sent to a usage domain from the
+ guest. Each bit in the mask, from left to right (i.e. from most significant to
+ least significant bit in big endian order), corresponds to a domain from
+ 0-255. If a bit is set, the corresponding domain can be modified by an AP
+ command-request message sent to a usage domain.
+
+If you recall from the description of an AP Queue, AP instructions include
+an APQN to identify the AP queue to which an AP command-request message is to be
+sent (NQAP and PQAP instructions), or from which a command-reply message is to
+be received (DQAP instruction). The validity of an APQN is defined by the matrix
+calculated from the APM and AQM; it is the cross product of all assigned adapter
+numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
+and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
+(2,5) and (2,6) will be valid for the guest.
+
+The APQNs can provide secure key functionality - i.e., a private key is stored
+on the adapter card for each of its domains - so each APQN must be assigned to
+at most one guest or to the linux host.
+
+ Example 1: Valid configuration:
+ ------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapter 1,2 domain 7
+
+ This is valid because both guests have a unique set of APQNs:
+ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+ Guest2 has APQNs (1,7), (2,7)
+
+ Example 2: Valid configuration:
+ ------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapters 3,4 domains 5,6
+
+ This is also valid because both guests have a unique set of APQNs:
+ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+ Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+
+ Example 3: Invalid configuration:
+ --------------------------------
+ Guest1: adapters 1,2 domains 5,6
+ Guest2: adapter 1 domains 6,7
+
+ This is an invalid configuration because both guests have access to
+ APQN (1,6).
+
+The Design:
+===========
+The design introduces three new objects:
+
+1. AP matrix device
+2. VFIO AP device driver (vfio_ap.ko)
+3. VFIO AP mediated matrix pass-through device
+
+The VFIO AP device driver
+-------------------------
+The VFIO AP (vfio_ap) device driver serves the following purposes:
+
+1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
+
+2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
+ device and creates the sysfs interfaces for assigning adapters, usage
+ domains, and control domains comprising the matrix for a KVM guest.
+
+3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
+ SIE state description to grant the guest access to a matrix of AP devices
+
+Reserve APQNs for exclusive use of KVM guests
+---------------------------------------------
+The following block diagram illustrates the mechanism by which APQNs are
+reserved:
+
+ +------------------+
+ 7 remove | |
+ +--------------------> cex4queue driver |
+ | | |
+ | +------------------+
+ |
+ |
+ | +------------------+ +-----------------+
+ | 5 register driver | | 3 create | |
+ | +----------------> Device core +----------> matrix device |
+ | | | | | |
+ | | +--------^---------+ +-----------------+
+ | | |
+ | | +-------------------+
+ | | +-----------------------------------+ |
+ | | | 4 register AP driver | | 2 register device
+ | | | | |
++--------+---+-v---+ +--------+-------+-+
+| | | |
+| ap_bus +--------------------- > vfio_ap driver |
+| | 8 probe | |
++--------^---------+ +--^--^------------+
+6 edit | | |
+ apmask | +-----------------------------+ | 9 mdev create
+ aqmask | | 1 modprobe |
++--------+-----+---+ +----------------+-+ +------------------+
+| | | |8 create | mediated |
+| admin | | VFIO device core |---------> matrix |
+| + | | | device |
++------+-+---------+ +--------^---------+ +--------^---------+
+ | | | |
+ | | 9 create vfio_ap-passthrough | |
+ | +------------------------------+ |
+ +-------------------------------------------------------------+
+ 10 assign adapter/domain/control domain
+
+The process for reserving an AP queue for use by a KVM guest is:
+
+1. The administrator loads the vfio_ap device driver
+2. The vfio-ap driver during its initialization will register a single 'matrix'
+ device with the device core. This will serve as the parent device for
+ all mediated matrix devices used to configure an AP matrix for a guest.
+3. The /sys/devices/vfio_ap/matrix device is created by the device core
+4 The vfio_ap device driver will register with the AP bus for AP queue devices
+ of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
+ driver's probe and remove callback interfaces. Devices older than CEX4 queues
+ are not supported to simplify the implementation by not needlessly
+ complicating the design by supporting older devices that will go out of
+ service in the relatively near future, and for which there are few older
+ systems around on which to test.
+5. The AP bus registers the vfio_ap device driver with the device core
+6. The administrator edits the AP adapter and queue masks to reserve AP queues
+ for use by the vfio_ap device driver.
+7. The AP bus removes the AP queues reserved for the vfio_ap driver from the
+ default zcrypt cex4queue driver.
+8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
+ it.
+9. The administrator creates a passthrough type mediated matrix device to be
+ used by a guest
+10 The administrator assigns the adapters, usage domains and control domains
+ to be exclusively used by a guest.
+
+Set up the VFIO mediated device interfaces
+------------------------------------------
+The VFIO AP device driver utilizes the common interface of the VFIO mediated
+device core driver to:
+* Register an AP mediated bus driver to add a mediated matrix device to and
+ remove it from a VFIO group.
+* Create and destroy a mediated matrix device
+* Add a mediated matrix device to and remove it from the AP mediated bus driver
+* Add a mediated matrix device to and remove it from an IOMMU group
+
+The following high-level block diagram shows the main components and interfaces
+of the VFIO AP mediated matrix device driver:
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ap.ko |<-> matrix
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
+
+During initialization of the vfio_ap module, the matrix device is registered
+with an 'mdev_parent_ops' structure that provides the sysfs attribute
+structures, mdev functions and callback interfaces for managing the mediated
+matrix device.
+
+* sysfs attribute structures:
+ * supported_type_groups
+ The VFIO mediated device framework supports creation of user-defined
+ mediated device types. These mediated device types are specified
+ via the 'supported_type_groups' structure when a device is registered
+ with the mediated device framework. The registration process creates the
+ sysfs structures for each mediated device type specified in the
+ 'mdev_supported_types' sub-directory of the device being registered. Along
+ with the device type, the sysfs attributes of the mediated device type are
+ provided.
+
+ The VFIO AP device driver will register one mediated device type for
+ passthrough devices:
+ /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+ Only the read-only attributes required by the VFIO mdev framework will
+ be provided:
+ ... name
+ ... device_api
+ ... available_instances
+ ... device_api
+ Where:
+ * name: specifies the name of the mediated device type
+ * device_api: the mediated device type's API
+ * available_instances: the number of mediated matrix passthrough devices
+ that can be created
+ * device_api: specifies the VFIO API
+ * mdev_attr_groups
+ This attribute group identifies the user-defined sysfs attributes of the
+ mediated device. When a device is registered with the VFIO mediated device
+ framework, the sysfs attribute files identified in the 'mdev_attr_groups'
+ structure will be created in the mediated matrix device's directory. The
+ sysfs attributes for a mediated matrix device are:
+ * assign_adapter:
+ * unassign_adapter:
+ Write-only attributes for assigning/unassigning an AP adapter to/from the
+ mediated matrix device. To assign/unassign an adapter, the APID of the
+ adapter is echoed to the respective attribute file.
+ * assign_domain:
+ * unassign_domain:
+ Write-only attributes for assigning/unassigning an AP usage domain to/from
+ the mediated matrix device. To assign/unassign a domain, the domain
+ number of the the usage domain is echoed to the respective attribute
+ file.
+ * matrix:
+ A read-only file for displaying the APQNs derived from the cross product
+ of the adapter and domain numbers assigned to the mediated matrix device.
+ * assign_control_domain:
+ * unassign_control_domain:
+ Write-only attributes for assigning/unassigning an AP control domain
+ to/from the mediated matrix device. To assign/unassign a control domain,
+ the ID of the domain to be assigned/unassigned is echoed to the respective
+ attribute file.
+ * control_domains:
+ A read-only file for displaying the control domain numbers assigned to the
+ mediated matrix device.
+
+* functions:
+ * create:
+ allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+ * Store the reference to the KVM structure for the guest using the mdev
+ * Store the AP matrix configuration for the adapters, domains, and control
+ domains assigned via the corresponding sysfs attributes files
+ * remove:
+ deallocates the mediated matrix device's ap_matrix_mdev structure. This will
+ be allowed only if a running guest is not using the mdev.
+
+* callback interfaces
+ * open:
+ The vfio_ap driver uses this callback to register a
+ VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
+ device. The open is invoked when QEMU connects the VFIO iommu group
+ for the mdev matrix device to the MDEV bus. Access to the KVM structure used
+ to configure the KVM guest is provided via this callback. The KVM structure,
+ is used to configure the guest's access to the AP matrix defined via the
+ mediated matrix device's sysfs attribute files.
+ * release:
+ unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
+ mdev matrix device and deconfigures the guest's AP matrix.
+
+Configure the APM, AQM and ADM in the CRYCB:
+-------------------------------------------
+Configuring the AP matrix for a KVM guest will be performed when the
+VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
+function is called when QEMU connects to KVM. The guest's AP matrix is
+configured via it's CRYCB by:
+* Setting the bits in the APM corresponding to the APIDs assigned to the
+ mediated matrix device via its 'assign_adapter' interface.
+* Setting the bits in the AQM corresponding to the domains assigned to the
+ mediated matrix device via its 'assign_domain' interface.
+* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
+ mediated matrix device via its 'assign_control_domains' interface.
+
+The CPU model features for AP
+-----------------------------
+The AP stack relies on the presence of the AP instructions as well as two
+facilities: The AP Facilities Test (APFT) facility; and the AP Query
+Configuration Information (QCI) facility. These features/facilities are made
+available to a KVM guest via the following CPU model features:
+
+1. ap: Indicates whether the AP instructions are installed on the guest. This
+ feature will be enabled by KVM only if the AP instructions are installed
+ on the host.
+
+2. apft: Indicates the APFT facility is available on the guest. This facility
+ can be made available to the guest only if it is available on the host (i.e.,
+ facility bit 15 is set).
+
+3. apqci: Indicates the AP QCI facility is available on the guest. This facility
+ can be made available to the guest only if it is available on the host (i.e.,
+ facility bit 12 is set).
+
+Note: If the user chooses to specify a CPU model different than the 'host'
+model to QEMU, the CPU model features and facilities need to be turned on
+explicitly; for example:
+
+ /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
+
+A guest can be precluded from using AP features/facilities by turning them off
+explicitly; for example:
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
+
+Note: If the APFT facility is turned off (apft=off) for the guest, the guest
+will not see any AP devices. The zcrypt device drivers that register for type 10
+and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
+the APFT facility to ascertain the facilities installed on a given AP device. If
+the APFT facility is not installed on the guest, then the probe of device
+drivers will fail since only type 10 and newer devices can be configured for
+guest use.
+
+Example:
+=======
+Let's now provide an example to illustrate how KVM guests may be given
+access to AP facilities. For this example, we will show how to configure
+three guests such that executing the lszcrypt command on the guests would
+look like this:
+
+Guest1
+------
+CARD.DOMAIN TYPE MODE
+------------------------------
+05 CEX5C CCA-Coproc
+05.0004 CEX5C CCA-Coproc
+05.00ab CEX5C CCA-Coproc
+06 CEX5A Accelerator
+06.0004 CEX5A Accelerator
+06.00ab CEX5C CCA-Coproc
+
+Guest2
+------
+CARD.DOMAIN TYPE MODE
+------------------------------
+05 CEX5A Accelerator
+05.0047 CEX5A Accelerator
+05.00ff CEX5A Accelerator
+
+Guest2
+------
+CARD.DOMAIN TYPE MODE
+------------------------------
+06 CEX5A Accelerator
+06.0047 CEX5A Accelerator
+06.00ff CEX5A Accelerator
+
+These are the steps:
+
+1. Install the vfio_ap module on the linux host. The dependency chain for the
+ vfio_ap module is:
+ * iommu
+ * s390
+ * zcrypt
+ * vfio
+ * vfio_mdev
+ * vfio_mdev_device
+ * KVM
+
+ To build the vfio_ap module, the kernel build must be configured with the
+ following Kconfig elements selected:
+ * IOMMU_SUPPORT
+ * S390
+ * ZCRYPT
+ * S390_AP_IOMMU
+ * VFIO
+ * VFIO_MDEV
+ * VFIO_MDEV_DEVICE
+ * KVM
+
+ If using make menuconfig select the following to build the vfio_ap module:
+ -> Device Drivers
+ -> IOMMU Hardware Support
+ select S390 AP IOMMU Support
+ -> VFIO Non-Privileged userspace driver framework
+ -> Mediated device driver frramework
+ -> VFIO driver for Mediated devices
+ -> I/O subsystem
+ -> VFIO support for AP devices
+
+2. Secure the AP queues to be used by the three guests so that the host can not
+ access them. To secure them, there are two sysfs files that specify
+ bitmasks marking a subset of the APQN range as 'usable by the default AP
+ queue device drivers' or 'not usable by the default device drivers' and thus
+ available for use by the vfio_ap device driver'. The location of the sysfs
+ files containing the masks are:
+
+ /sys/bus/ap/apmask
+ /sys/bus/ap/aqmask
+
+ The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
+ (APID). Each bit in the mask, from left to right (i.e., from most significant
+ to least significant bit in big endian order), corresponds to an APID from
+ 0-255. If a bit is set, the APID is marked as usable only by the default AP
+ queue device drivers; otherwise, the APID is usable by the vfio_ap
+ device driver.
+
+ The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
+ (APQI). Each bit in the mask, from left to right (i.e., from most significant
+ to least significant bit in big endian order), corresponds to an APQI from
+ 0-255. If a bit is set, the APQI is marked as usable only by the default AP
+ queue device drivers; otherwise, the APQI is usable by the vfio_ap device
+ driver.
+
+ Take, for example, the following mask:
+
+ 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+ It indicates:
+
+ 1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
+ belong to the vfio_ap device driver's pool.
+
+ The APQN of each AP queue device assigned to the linux host is checked by the
+ AP bus against the set of APQNs derived from the cross product of APIDs
+ and APQIs marked as usable only by the default AP queue device drivers. If a
+ match is detected, only the default AP queue device drivers will be probed;
+ otherwise, the vfio_ap device driver will be probed.
+
+ By default, the two masks are set to reserve all APQNs for use by the default
+ AP queue device drivers. There are two ways the default masks can be changed:
+
+ 1. The sysfs mask files can be edited by echoing a string into the
+ respective sysfs mask file in one of two formats:
+
+ * An absolute hex string starting with 0x - like "0x12345678" - sets
+ the mask. If the given string is shorter than the mask, it is padded
+ with 0s on the right; for example, specifying a mask value of 0x41 is
+ the same as specifying:
+
+ 0x4100000000000000000000000000000000000000000000000000000000000000
+
+ Keep in mind that the mask reads from left to right (i.e., most
+ significant to least significant bit in big endian order), so the mask
+ above identifies device numbers 1 and 7 (01000001).
+
+ If the string is longer than the mask, the operation is terminated with
+ an error (EINVAL).
+
+ * Individual bits in the mask can be switched on and off by specifying
+ each bit number to be switched in a comma separated list. Each bit
+ number string must be prepended with a ('+') or minus ('-') to indicate
+ the corresponding bit is to be switched on ('+') or off ('-'). Some
+ valid values are:
+
+ "+0" switches bit 0 on
+ "-13" switches bit 13 off
+ "+0x41" switches bit 65 on
+ "-0xff" switches bit 255 off
+
+ The following example:
+ +0,-6,+0x47,-0xf0
+
+ Switches bits 0 and 71 (0x47) on
+ Switches bits 6 and 240 (0xf0) off
+
+ Note that the bits not specified in the list remain as they were before
+ the operation.
+
+ 2. The masks can also be changed at boot time via parameters on the kernel
+ command line like this:
+
+ ap.apmask=0xffff ap.aqmask=0x40
+
+ This would create the following masks:
+
+ apmask:
+ 0xffff000000000000000000000000000000000000000000000000000000000000
+
+ aqmask:
+ 0x4000000000000000000000000000000000000000000000000000000000000000
+
+ Resulting in these two pools:
+
+ default drivers pool: adapter 0-15, domain 1
+ alternate drivers pool: adapter 16-255, domains 0, 2-255
+
+ Securing the APQNs for our example:
+ ----------------------------------
+ To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
+ 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
+ APQNs can either be removed from the default masks:
+
+ echo -5,-6 > /sys/bus/ap/apmask
+
+ echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
+
+ Or the masks can be set as follows:
+
+ echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
+ > apmask
+
+ echo 0xf7fffffffffffffffeffffffffffffffffffffffffeffffffffffffffffffffe \
+ > aqmask
+
+ This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
+ 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
+ sysfs directory for the vfio_ap device driver will now contain symbolic links
+ to the AP queue devices bound to it:
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [vfio_ap]
+ ......... [05.0004]
+ ......... [05.0047]
+ ......... [05.00ab]
+ ......... [05.00ff]
+ ......... [06.0004]
+ ......... [06.0047]
+ ......... [06.00ab]
+ ......... [06.00ff]
+
+ Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
+ can be bound to the vfio_ap device driver. The reason for this is to
+ simplify the implementation by not needlessly complicating the design by
+ supporting older devices that will go out of service in the relatively near
+ future and for which there are few older systems on which to test.
+
+ The administrator, therefore, must take care to secure only AP queues that
+ can be bound to the vfio_ap device driver. The device type for a given AP
+ queue device can be read from the parent card's sysfs directory. For example,
+ to see the hardware type of the queue 05.0004:
+
+ cat /sys/bus/ap/devices/card05/hwtype
+
+ The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
+ vfio_ap device driver.
+
+3. Create the mediated devices needed to configure the AP matrixes for the
+ three guests and to provide an interface to the vfio_ap driver for
+ use by the guests:
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+ --------- create
+ --------- [devices]
+
+ To create the mediated devices for the three guests:
+
+ uuidgen > create
+ uuidgen > create
+ uuidgen > create
+
+ or
+
+ echo $uuid1 > create
+ echo $uuid2 > create
+ echo $uuid3 > create
+
+ This will create three mediated devices in the [devices] subdirectory named
+ after the UUID written to the create attribute file. We call them $uuid1,
+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ --------------- unassign_control_domain
+ --------------- unassign_domain
+
+ ------------ [$uuid2]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+ ------------ [$uuid3]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+4. The administrator now needs to configure the matrixes for the mediated
+ devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
+
+ This is how the matrix is configured for Guest1:
+
+ echo 5 > assign_adapter
+ echo 6 > assign_adapter
+ echo 4 > assign_domain
+ echo 0xab > assign_domain
+
+ Control domains can similarly be assigned using the assign_control_domain
+ sysfs file.
+
+ If a mistake is made configuring an adapter, domain or control domain,
+ you can use the unassign_xxx files to unassign the adapter, domain or
+ control domain.
+
+ To display the matrix configuration for Guest1:
+
+ cat matrix
+
+ This is how the matrix is configured for Guest2:
+
+ echo 5 > assign_adapter
+ echo 0x47 > assign_domain
+ echo 0xff > assign_domain
+
+ This is how the matrix is configured for Guest3:
+
+ echo 6 > assign_adapter
+ echo 0x47 > assign_domain
+ echo 0xff > assign_domain
+
+ In order to successfully assign an adapter:
+
+ * The adapter number specified must represent a value from 0 up to the
+ maximum adapter number configured for the system. If an adapter number
+ higher than the maximum is specified, the operation will terminate with
+ an error (ENODEV).
+
+ * All APQNs that can be derived from the adapter ID and the IDs of
+ the previously assigned domains must be bound to the vfio_ap device
+ driver. If no domains have yet been assigned, then there must be at least
+ one APQN with the specified APID bound to the vfio_ap driver. If no such
+ APQNs are bound to the driver, the operation will terminate with an
+ error (EADDRNOTAVAIL).
+
+ No APQN that can be derived from the adapter ID and the IDs of the
+ previously assigned domains can be assigned to another mediated matrix
+ device. If an APQN is assigned to another mediated matrix device, the
+ operation will terminate with an error (EADDRINUSE).
+
+ In order to successfully assign a domain:
+
+ * The domain number specified must represent a value from 0 up to the
+ maximum domain number configured for the system. If a domain number
+ higher than the maximum is specified, the operation will terminate with
+ an error (ENODEV).
+
+ * All APQNs that can be derived from the domain ID and the IDs of
+ the previously assigned adapters must be bound to the vfio_ap device
+ driver. If no domains have yet been assigned, then there must be at least
+ one APQN with the specified APQI bound to the vfio_ap driver. If no such
+ APQNs are bound to the driver, the operation will terminate with an
+ error (EADDRNOTAVAIL).
+
+ No APQN that can be derived from the domain ID and the IDs of the
+ previously assigned adapters can be assigned to another mediated matrix
+ device. If an APQN is assigned to another mediated matrix device, the
+ operation will terminate with an error (EADDRINUSE).
+
+ In order to successfully assign a control domain, the domain number
+ specified must represent a value from 0 up to the maximum domain number
+ configured for the system. If a control domain number higher than the maximum
+ is specified, the operation will terminate with an error (ENODEV).
+
+5. Start Guest1:
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+
+7. Start Guest2:
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+
+7. Start Guest3:
+
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+
+When the guest is shut down, the mediated matrix devices may be removed.
+
+Using our example again, to remove the mediated matrix device $uuid1:
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- remove
+
+
+ echo 1 > remove
+
+ This will remove all of the mdev matrix device's sysfs structures including
+ the mdev device itself. To recreate and reconfigure the mdev matrix device,
+ all of the steps starting with step 3 will have to be performed again. Note
+ that the remove will fail if a guest using the mdev is still running.
+
+ It is not necessary to remove an mdev matrix device, but one may want to
+ remove it if no guest will use it during the remaining lifetime of the linux
+ host. If the mdev matrix device is removed, one may want to also reconfigure
+ the pool of adapters and queues reserved for use by the default drivers.
+
+Limitations
+===========
+* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
+ to the default drivers pool of a queue that is still assigned to a mediated
+ device in use by a guest. It is incumbent upon the administrator to
+ ensure there is no mediated device in use by a guest to which the APQN is
+ assigned lest the host be given access to the private data of the AP queue
+ device such as a private key configured specifically for the guest.
+
+* Dynamically modifying the AP matrix for a running guest (which would amount to
+ hot(un)plug of AP devices for the guest) is currently not supported
+
+* Live guest migration is not supported for guests using AP devices.
diff --git a/MAINTAINERS b/MAINTAINERS
index e993064637ca..1610fb26bdac 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12668,6 +12668,18 @@ W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
F: drivers/s390/crypto/
+S390 VFIO AP DRIVER
+M: Tony Krowiak <akrowiak@linux.ibm.com>
+M: Pierre Morel <pmorel@linux.ibm.com>
+M: Halil Pasic <pasic@linux.ibm.com>
+L: linux-s390@vger.kernel.org
+W: http://www.ibm.com/developerworks/linux/linux390/
+S: Supported
+F: drivers/s390/crypto/vfio_ap_drv.c
+F: drivers/s390/crypto/vfio_ap_private.h
+F: drivers/s390/crypto/vfio_ap_ops.c
+F: Documentation/s390/vfio-ap.txt
+
S390 ZFCP DRIVER
M: Steffen Maier <maier@linux.ibm.com>
M: Benjamin Block <bblock@linux.ibm.com>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9a9c7a6fe925..8cc8f25d9576 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -773,6 +773,17 @@ config VFIO_CCW
To compile this driver as a module, choose M here: the
module will be called vfio_ccw.
+config VFIO_AP
+ def_tristate n
+ prompt "VFIO support for AP devices"
+ depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+ help
+ This driver grants access to Adjunct Processor (AP) devices
+ via the VFIO mediated device interface.
+
+ To compile this driver as a module, choose M here: the module
+ will be called vfio_ap.
+
endmenu
menu "Dump support"
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 29c940bf8506..febd1709472a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
+#define KVM_REQ_VSIE_RESTART KVM_ARCH_REQ(5)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -186,6 +187,7 @@ struct kvm_s390_sie_block {
#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
#define ECA_SII 0x00000001
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
@@ -237,7 +239,11 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
- __u8 reservedb0[20]; /* 0x00b0 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[11]; /* 0x00b9 */
__u16 extcpuaddr; /* 0x00c4 */
__u16 eic; /* 0x00c6 */
__u32 reservedc8; /* 0x00c8 */
@@ -255,6 +261,8 @@ struct kvm_s390_sie_block {
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[12]; /* 0x00f0 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
@@ -715,6 +723,7 @@ struct kvm_s390_crypto {
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
+ __u8 apie;
};
#define APCB0_MASK_SIZE 1
@@ -855,6 +864,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
+void kvm_arch_crypto_clear_masks(struct kvm *kvm);
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9a50f02b9894..16511d97e8dc 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5
/* kvm attributes for migration mode */
#define KVM_S390_VM_MIGRATION_STOP 0
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ac5da6b0b862..b7964d77c6cd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/ap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -844,20 +845,22 @@ void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
kvm_s390_vcpu_block_all(kvm);
- kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_s390_vcpu_crypto_setup(vcpu);
+ /* recreate the shadow crycb by leaving the VSIE handler */
+ kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+ }
kvm_s390_vcpu_unblock_all(kvm);
}
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
{
- if (!test_kvm_facility(kvm, 76))
- return -EINVAL;
-
mutex_lock(&kvm->lock);
switch (attr->attr) {
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+ if (!test_kvm_facility(kvm, 76))
+ return -EINVAL;
get_random_bytes(
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
@@ -865,6 +868,8 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+ if (!test_kvm_facility(kvm, 76))
+ return -EINVAL;
get_random_bytes(
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
@@ -872,17 +877,35 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+ if (!test_kvm_facility(kvm, 76))
+ return -EINVAL;
kvm->arch.crypto.aes_kw = 0;
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+ if (!test_kvm_facility(kvm, 76))
+ return -EINVAL;
kvm->arch.crypto.dea_kw = 0;
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
break;
+ case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+ if (!ap_instructions_available()) {
+ mutex_unlock(&kvm->lock);
+ return -EOPNOTSUPP;
+ }
+ kvm->arch.crypto.apie = 1;
+ break;
+ case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+ if (!ap_instructions_available()) {
+ mutex_unlock(&kvm->lock);
+ return -EOPNOTSUPP;
+ }
+ kvm->arch.crypto.apie = 0;
+ break;
default:
mutex_unlock(&kvm->lock);
return -ENXIO;
@@ -1491,6 +1514,10 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
ret = 0;
break;
+ case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+ case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+ ret = ap_instructions_available() ? 0 : -ENXIO;
+ break;
default:
ret = -ENXIO;
break;
@@ -1992,55 +2019,60 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
-static int kvm_s390_query_ap_config(u8 *config)
-{
- u32 fcn_code = 0x04000000UL;
- u32 cc = 0;
-
- memset(config, 0, 128);
- asm volatile(
- "lgr 0,%1\n"
- "lgr 2,%2\n"
- ".long 0xb2af0000\n" /* PQAP(QCI) */
- "0: ipm %0\n"
- "srl %0,28\n"
- "1:\n"
- EX_TABLE(0b, 1b)
- : "+r" (cc)
- : "r" (fcn_code), "r" (config)
- : "cc", "0", "2", "memory"
- );
-
- return cc;
-}
-
static int kvm_s390_apxa_installed(void)
{
- u8 config[128];
- int cc;
-
- if (test_facility(12)) {
- cc = kvm_s390_query_ap_config(config);
+ struct ap_config_info info;
- if (cc)
- pr_err("PQAP(QCI) failed with cc=%d", cc);
- else
- return config[0] & 0x40;
+ if (ap_instructions_available()) {
+ if (ap_qci(&info) == 0)
+ return info.apxa;
}
return 0;
}
+/*
+ * The format of the crypto control block (CRYCB) is specified in the 3 low
+ * order bits of the CRYCB designation (CRYCBD) field as follows:
+ * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
+ * AP extended addressing (APXA) facility are installed.
+ * Format 1: The APXA facility is not installed but the MSAX3 facility is.
+ * Format 2: Both the APXA and MSAX3 facilities are installed
+ */
static void kvm_s390_set_crycb_format(struct kvm *kvm)
{
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+ /* Clear the CRYCB format bits - i.e., set format 0 by default */
+ kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
+
+ /* Check whether MSAX3 is installed */
+ if (!test_kvm_facility(kvm, 76))
+ return;
+
if (kvm_s390_apxa_installed())
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
else
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
+void kvm_arch_crypto_clear_masks(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_s390_vcpu_block_all(kvm);
+
+ memset(&kvm->arch.crypto.crycb->apcb0, 0,
+ sizeof(kvm->arch.crypto.crycb->apcb0));
+ memset(&kvm->arch.crypto.crycb->apcb1, 0,
+ sizeof(kvm->arch.crypto.crycb->apcb1));
+
+ /* recreate the shadow crycb for each vcpu */
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+ kvm_s390_vcpu_unblock_all(kvm);
+ mutex_unlock(&kvm->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
+
static u64 kvm_s390_get_initial_cpuid(void)
{
struct cpuid cpuid;
@@ -2052,12 +2084,12 @@ static u64 kvm_s390_get_initial_cpuid(void)
static void kvm_s390_crypto_init(struct kvm *kvm)
{
- if (!test_kvm_facility(kvm, 76))
- return;
-
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
+ if (!test_kvm_facility(kvm, 76))
+ return;
+
/* Enable AES/DEA protected key functions by default */
kvm->arch.crypto.aes_kw = 1;
kvm->arch.crypto.dea_kw = 1;
@@ -2583,17 +2615,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
- if (!test_kvm_facility(vcpu->kvm, 76))
+ /*
+ * If the AP instructions are not being interpreted and the MSAX3
+ * facility is not configured for the guest, there is nothing to set up.
+ */
+ if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
return;
+ vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+ vcpu->arch.sie_block->eca &= ~ECA_APIE;
+
+ if (vcpu->kvm->arch.crypto.apie)
+ vcpu->arch.sie_block->eca |= ECA_APIE;
+ /* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
-
- vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
}
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
@@ -2685,6 +2725,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+ vcpu->arch.sie_block->hpid = HPID_KVM;
+
kvm_s390_vcpu_crypto_setup(vcpu);
return rc;
@@ -2768,18 +2810,25 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
exit_sie(vcpu);
}
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->prog20) &
+ (PROG_BLOCK_SIE | PROG_REQUEST);
+}
+
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
{
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
}
/*
- * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_vsie_kick(vcpu);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -3196,6 +3245,8 @@ retry:
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ /* we left the vsie handler, nothing to do, just clear the request */
+ kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 981e3ba97461..1f6e36cdce0d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -290,6 +290,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a2b28cd1e3fe..a153257bf7d9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -135,14 +135,148 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
atomic_set(&scb_s->cpuflags, newflags);
return 0;
}
+/* Copy to APCB FORMAT1 from APCB FORMAT0 */
+static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
+ unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+{
+ struct kvm_s390_apcb0 tmp;
-/*
+ if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+ return -EFAULT;
+
+ apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
+ apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
+ apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
+
+ return 0;
+
+}
+
+/**
+ * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original apcb in the guest2
+ * @apcb_h: pointer to start of apcb in the guest1
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ unsigned long apcb_o, unsigned long *apcb_h)
+{
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
+ sizeof(struct kvm_s390_apcb0)))
+ return -EFAULT;
+
+ bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+
+ return 0;
+}
+
+/**
+ * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original guest apcb
+ * @apcb_h: pointer to start of apcb in the host
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ unsigned long apcb_o,
+ unsigned long *apcb_h)
+{
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
+ sizeof(struct kvm_s390_apcb1)))
+ return -EFAULT;
+
+ bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+
+ return 0;
+}
+
+/**
+ * setup_apcb - Create a shadow copy of the apcb.
+ * @vcpu: pointer to the virtual CPU
+ * @crycb_s: pointer to shadow crycb
+ * @crycb_o: pointer to original guest crycb
+ * @crycb_h: pointer to the host crycb
+ * @fmt_o: format of the original guest crycb.
+ * @fmt_h: format of the host crycb.
+ *
+ * Checks the compatibility between the guest and host crycb and calls the
+ * appropriate copy function.
+ *
+ * Return 0 or an error number if the guest and host crycb are incompatible.
+ */
+static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
+ const u32 crycb_o,
+ struct kvm_s390_crypto_cb *crycb_h,
+ int fmt_o, int fmt_h)
+{
+ struct kvm_s390_crypto_cb *crycb;
+
+ crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
+
+ switch (fmt_o) {
+ case CRYCB_FORMAT2:
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+ return -EACCES;
+ if (fmt_h != CRYCB_FORMAT2)
+ return -EINVAL;
+ return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
+ (unsigned long) &crycb->apcb1,
+ (unsigned long *)&crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ switch (fmt_h) {
+ case CRYCB_FORMAT2:
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
+ (unsigned long) &crycb->apcb0,
+ &crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ return setup_apcb00(vcpu,
+ (unsigned long *) &crycb_s->apcb0,
+ (unsigned long) &crycb->apcb0,
+ (unsigned long *) &crycb_h->apcb0);
+ }
+ break;
+ case CRYCB_FORMAT0:
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+ return -EACCES;
+
+ switch (fmt_h) {
+ case CRYCB_FORMAT2:
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
+ (unsigned long) &crycb->apcb0,
+ &crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ case CRYCB_FORMAT0:
+ return setup_apcb00(vcpu,
+ (unsigned long *) &crycb_s->apcb0,
+ (unsigned long) &crycb->apcb0,
+ (unsigned long *) &crycb_h->apcb0);
+ }
+ }
+ return -EINVAL;
+}
+
+/**
+ * shadow_crycb - Create a shadow copy of the crycb block
+ * @vcpu: a pointer to the virtual CPU
+ * @vsie_page: a pointer to internal date used for the vSIE
+ *
* Create a shadow copy of the crycb block and setup key wrapping, if
* requested for guest 3 and enabled for guest 2.
*
- * We only accept format-1 (no AP in g2), but convert it into format-2
+ * We accept format-1 or format-2, but we convert format-1 into format-2
+ * in the shadow CRYCB.
+ * Using format-2 enables the firmware to choose the right format when
+ * scheduling the SIE.
* There is nothing to do for format-0.
*
+ * This function centralize the issuing of set_validity_icpt() for all
+ * the subfunctions working on the crycb.
+ *
* Returns: - 0 if shadowed or nothing to do
* - > 0 if control has to be given to guest 2
*/
@@ -154,23 +288,40 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
+ int apie_h;
+ int key_msk = test_kvm_facility(vcpu->kvm, 76);
+ int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
+ int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
+ int ret = 0;
scb_s->crycbd = 0;
- if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
- return 0;
- /* format-1 is supported with message-security-assist extension 3 */
- if (!test_kvm_facility(vcpu->kvm, 76))
+
+ apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
+ if (!apie_h && !key_msk)
return 0;
+
+ if (!crycb_addr)
+ return set_validity_icpt(scb_s, 0x0039U);
+
+ if (fmt_o == CRYCB_FORMAT1)
+ if ((crycb_addr & PAGE_MASK) !=
+ ((crycb_addr + 128) & PAGE_MASK))
+ return set_validity_icpt(scb_s, 0x003CU);
+
+ if (apie_h && (scb_o->eca & ECA_APIE)) {
+ ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
+ vcpu->kvm->arch.crypto.crycb,
+ fmt_o, fmt_h);
+ if (ret)
+ goto end;
+ scb_s->eca |= scb_o->eca & ECA_APIE;
+ }
+
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
if (!ecb3_flags)
- return 0;
-
- if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
- return set_validity_icpt(scb_s, 0x003CU);
- else if (!crycb_addr)
- return set_validity_icpt(scb_s, 0x0039U);
+ goto end;
/* copy only the wrapping keys */
if (read_guest_real(vcpu, crycb_addr + 72,
@@ -178,8 +329,6 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;
- scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
- CRYCB_FORMAT2;
/* xor both blocks in one run */
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -187,6 +336,16 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
/* as 56%8 == 0, bitmap_xor won't overwrite any data */
bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+end:
+ switch (ret) {
+ case -EINVAL:
+ return set_validity_icpt(scb_s, 0x0020U);
+ case -EFAULT:
+ return set_validity_icpt(scb_s, 0x0035U);
+ case -EACCES:
+ return set_validity_icpt(scb_s, 0x003CU);
+ }
+ scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
return 0;
}
@@ -383,6 +542,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 156))
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+ scb_s->hpid = HPID_VSIE;
+
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
@@ -830,7 +991,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
int guest_bp_isolation;
- int rc;
+ int rc = 0;
handle_last_fault(vcpu, vsie_page);
@@ -858,7 +1019,18 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
guest_enter_irqoff();
local_irq_enable();
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ /*
+ * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
+ * and VCPU requests also hinder the vSIE from running and lead
+ * to an immediate exit. kvm_s390_vsie_kick() has to be used to
+ * also kick the vSIE.
+ */
+ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+ barrier();
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu))
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ barrier();
+ vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
local_irq_disable();
guest_exit_irqoff();
@@ -1005,7 +1177,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc == -EAGAIN)
rc = 0;
if (rc || scb_s->icptcode || signal_pending(current) ||
- kvm_s390_vcpu_has_irq(vcpu, 0))
+ kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ kvm_s390_vcpu_sie_inhibited(vcpu))
break;
}
@@ -1122,7 +1295,8 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
+ if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ kvm_s390_vcpu_sie_inhibited(vcpu))
return 0;
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 911c7ded35f1..1e668b95e0c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -907,10 +907,16 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
- spin_lock(&gmap->guest_table_lock);
pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+ if (!pmdp)
+ return NULL;
- if (!pmdp || pmd_none(*pmdp)) {
+ /* without huge pages, there is no need to take the table lock */
+ if (!gmap->mm->context.allow_gmap_hpage_1m)
+ return pmd_none(*pmdp) ? NULL : pmdp;
+
+ spin_lock(&gmap->guest_table_lock);
+ if (pmd_none(*pmdp)) {
spin_unlock(&gmap->guest_table_lock);
return NULL;
}
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 0c85aedcf9b3..fd788e0f2e5b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -106,6 +106,8 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){
+ 12, /* AP Query Configuration Information */
+ 15, /* AP Facilities Test */
156, /* etoken facility */
-1 /* END */
}
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c60395b7470f..83e6d993fca5 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -372,6 +372,14 @@ config S390_CCW_IOMMU
Enables bits of IOMMU API required by VFIO. The iommu_ops
is not implemented as it is not necessary for VFIO.
+config S390_AP_IOMMU
+ bool "S390 AP IOMMU Support"
+ depends on S390 && ZCRYPT
+ select IOMMU_API
+ help
+ Enables bits of IOMMU API required by VFIO. The iommu_ops
+ is not implemented as it is not necessary for VFIO.
+
config MTK_IOMMU
bool "MTK IOMMU Support"
depends on ARM || ARM64
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index b59af548ed1c..8d36b05a7575 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -15,3 +15,7 @@ obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o
# pkey kernel module
pkey-objs := pkey_api.o
obj-$(CONFIG_PKEY) += pkey.o
+
+# adjunct processor matrix
+vfio_ap-objs := vfio_ap_drv.o vfio_ap_ops.o
+obj-$(CONFIG_VFIO_AP) += vfio_ap.o
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
new file mode 100644
index 000000000000..8b51821d9bf7
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO based AP device driver
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "vfio_ap_private.h"
+
+#define VFIO_AP_ROOT_NAME "vfio_ap"
+#define VFIO_AP_DEV_TYPE_NAME "ap_matrix"
+#define VFIO_AP_DEV_NAME "matrix"
+
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
+MODULE_LICENSE("GPL v2");
+
+static struct ap_driver vfio_ap_drv;
+
+static struct device_type vfio_ap_dev_type = {
+ .name = VFIO_AP_DEV_TYPE_NAME,
+};
+
+struct ap_matrix_dev *matrix_dev;
+
+/* Only type 10 adapters (CEX4 and later) are supported
+ * by the AP matrix device driver
+ */
+static struct ap_device_id ap_queue_ids[] = {
+ { .dev_type = AP_DEVICE_TYPE_CEX4,
+ .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+ { .dev_type = AP_DEVICE_TYPE_CEX5,
+ .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+ { .dev_type = AP_DEVICE_TYPE_CEX6,
+ .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
+ { /* end of sibling */ },
+};
+
+MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
+
+static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
+{
+ return 0;
+}
+
+static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
+{
+ /* Nothing to do yet */
+}
+
+static void vfio_ap_matrix_dev_release(struct device *dev)
+{
+ struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev);
+
+ kfree(matrix_dev);
+}
+
+static int vfio_ap_matrix_dev_create(void)
+{
+ int ret;
+ struct device *root_device;
+
+ root_device = root_device_register(VFIO_AP_ROOT_NAME);
+ if (IS_ERR(root_device))
+ return PTR_ERR(root_device);
+
+ matrix_dev = kzalloc(sizeof(*matrix_dev), GFP_KERNEL);
+ if (!matrix_dev) {
+ ret = -ENOMEM;
+ goto matrix_alloc_err;
+ }
+
+ /* Fill in config info via PQAP(QCI), if available */
+ if (test_facility(12)) {
+ ret = ap_qci(&matrix_dev->info);
+ if (ret)
+ goto matrix_alloc_err;
+ }
+
+ mutex_init(&matrix_dev->lock);
+ INIT_LIST_HEAD(&matrix_dev->mdev_list);
+
+ matrix_dev->device.type = &vfio_ap_dev_type;
+ dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME);
+ matrix_dev->device.parent = root_device;
+ matrix_dev->device.release = vfio_ap_matrix_dev_release;
+ matrix_dev->device.driver = &vfio_ap_drv.driver;
+
+ ret = device_register(&matrix_dev->device);
+ if (ret)
+ goto matrix_reg_err;
+
+ return 0;
+
+matrix_reg_err:
+ put_device(&matrix_dev->device);
+matrix_alloc_err:
+ root_device_unregister(root_device);
+
+ return ret;
+}
+
+static void vfio_ap_matrix_dev_destroy(void)
+{
+ device_unregister(&matrix_dev->device);
+ root_device_unregister(matrix_dev->device.parent);
+}
+
+int __init vfio_ap_init(void)
+{
+ int ret;
+
+ /* If there are no AP instructions, there is nothing to pass through. */
+ if (!ap_instructions_available())
+ return -ENODEV;
+
+ ret = vfio_ap_matrix_dev_create();
+ if (ret)
+ return ret;
+
+ memset(&vfio_ap_drv, 0, sizeof(vfio_ap_drv));
+ vfio_ap_drv.probe = vfio_ap_queue_dev_probe;
+ vfio_ap_drv.remove = vfio_ap_queue_dev_remove;
+ vfio_ap_drv.ids = ap_queue_ids;
+
+ ret = ap_driver_register(&vfio_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME);
+ if (ret) {
+ vfio_ap_matrix_dev_destroy();
+ return ret;
+ }
+
+ ret = vfio_ap_mdev_register();
+ if (ret) {
+ ap_driver_unregister(&vfio_ap_drv);
+ vfio_ap_matrix_dev_destroy();
+
+ return ret;
+ }
+
+ return 0;
+}
+
+void __exit vfio_ap_exit(void)
+{
+ vfio_ap_mdev_unregister();
+ ap_driver_unregister(&vfio_ap_drv);
+ vfio_ap_matrix_dev_destroy();
+}
+
+module_init(vfio_ap_init);
+module_exit(vfio_ap_exit);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
new file mode 100644
index 000000000000..d3d9eb72b0f1
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -0,0 +1,968 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Adjunct processor matrix VFIO device driver callbacks.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ * Halil Pasic <pasic@linux.ibm.com>
+ * Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/string.h>
+#include <linux/vfio.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/bitops.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/kvm.h>
+#include <asm/zcrypt.h>
+
+#include "vfio_ap_private.h"
+
+#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
+#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
+
+static void vfio_ap_matrix_init(struct ap_config_info *info,
+ struct ap_matrix *matrix)
+{
+ matrix->apm_max = info->apxa ? info->Na : 63;
+ matrix->aqm_max = info->apxa ? info->Nd : 15;
+ matrix->adm_max = info->apxa ? info->Nd : 15;
+}
+
+static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
+ return -EPERM;
+
+ matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
+ if (!matrix_mdev) {
+ atomic_inc(&matrix_dev->available_instances);
+ return -ENOMEM;
+ }
+
+ vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
+ mdev_set_drvdata(mdev, matrix_mdev);
+ mutex_lock(&matrix_dev->lock);
+ list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
+ mutex_unlock(&matrix_dev->lock);
+
+ return 0;
+}
+
+static int vfio_ap_mdev_remove(struct mdev_device *mdev)
+{
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ mutex_lock(&matrix_dev->lock);
+ list_del(&matrix_mdev->node);
+ mutex_unlock(&matrix_dev->lock);
+
+ kfree(matrix_mdev);
+ mdev_set_drvdata(mdev, NULL);
+ atomic_inc(&matrix_dev->available_instances);
+
+ return 0;
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
+}
+
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+ struct device *dev, char *buf)
+{
+ return sprintf(buf, "%d\n",
+ atomic_read(&matrix_dev->available_instances));
+}
+
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
+}
+
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *vfio_ap_mdev_type_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
+ .name = VFIO_AP_MDEV_TYPE_HWVIRT,
+ .attrs = vfio_ap_mdev_type_attrs,
+};
+
+static struct attribute_group *vfio_ap_mdev_type_groups[] = {
+ &vfio_ap_mdev_hwvirt_type_group,
+ NULL,
+};
+
+struct vfio_ap_queue_reserved {
+ unsigned long *apid;
+ unsigned long *apqi;
+ bool reserved;
+};
+
+/**
+ * vfio_ap_has_queue
+ *
+ * @dev: an AP queue device
+ * @data: a struct vfio_ap_queue_reserved reference
+ *
+ * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
+ * apid or apqi specified in @data:
+ *
+ * - If @data contains both an apid and apqi value, then @data will be flagged
+ * as reserved if the APID and APQI fields for the AP queue device matches
+ *
+ * - If @data contains only an apid value, @data will be flagged as
+ * reserved if the APID field in the AP queue device matches
+ *
+ * - If @data contains only an apqi value, @data will be flagged as
+ * reserved if the APQI field in the AP queue device matches
+ *
+ * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
+ * @data does not contain either an apid or apqi.
+ */
+static int vfio_ap_has_queue(struct device *dev, void *data)
+{
+ struct vfio_ap_queue_reserved *qres = data;
+ struct ap_queue *ap_queue = to_ap_queue(dev);
+ ap_qid_t qid;
+ unsigned long id;
+
+ if (qres->apid && qres->apqi) {
+ qid = AP_MKQID(*qres->apid, *qres->apqi);
+ if (qid == ap_queue->qid)
+ qres->reserved = true;
+ } else if (qres->apid && !qres->apqi) {
+ id = AP_QID_CARD(ap_queue->qid);
+ if (id == *qres->apid)
+ qres->reserved = true;
+ } else if (!qres->apid && qres->apqi) {
+ id = AP_QID_QUEUE(ap_queue->qid);
+ if (id == *qres->apqi)
+ qres->reserved = true;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * vfio_ap_verify_queue_reserved
+ *
+ * @matrix_dev: a mediated matrix device
+ * @apid: an AP adapter ID
+ * @apqi: an AP queue index
+ *
+ * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
+ * driver according to the following rules:
+ *
+ * - If both @apid and @apqi are not NULL, then there must be an AP queue
+ * device bound to the vfio_ap driver with the APQN identified by @apid and
+ * @apqi
+ *
+ * - If only @apid is not NULL, then there must be an AP queue device bound
+ * to the vfio_ap driver with an APQN containing @apid
+ *
+ * - If only @apqi is not NULL, then there must be an AP queue device bound
+ * to the vfio_ap driver with an APQN containing @apqi
+ *
+ * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
+ */
+static int vfio_ap_verify_queue_reserved(unsigned long *apid,
+ unsigned long *apqi)
+{
+ int ret;
+ struct vfio_ap_queue_reserved qres;
+
+ qres.apid = apid;
+ qres.apqi = apqi;
+ qres.reserved = false;
+
+ ret = driver_for_each_device(matrix_dev->device.driver, NULL, &qres,
+ vfio_ap_has_queue);
+ if (ret)
+ return ret;
+
+ if (qres.reserved)
+ return 0;
+
+ return -EADDRNOTAVAIL;
+}
+
+static int
+vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid)
+{
+ int ret;
+ unsigned long apqi;
+ unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
+
+ if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
+ return vfio_ap_verify_queue_reserved(&apid, NULL);
+
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
+ ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * vfio_ap_mdev_verify_no_sharing
+ *
+ * Verifies that the APQNs derived from the cross product of the AP adapter IDs
+ * and AP queue indexes comprising the AP matrix are not configured for another
+ * mediated device. AP queue sharing is not allowed.
+ *
+ * @matrix_mdev: the mediated matrix device
+ *
+ * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
+ */
+static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
+{
+ struct ap_matrix_mdev *lstdev;
+ DECLARE_BITMAP(apm, AP_DEVICES);
+ DECLARE_BITMAP(aqm, AP_DOMAINS);
+
+ list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
+ if (matrix_mdev == lstdev)
+ continue;
+
+ memset(apm, 0, sizeof(apm));
+ memset(aqm, 0, sizeof(aqm));
+
+ /*
+ * We work on full longs, as we can only exclude the leftover
+ * bits in non-inverse order. The leftover is all zeros.
+ */
+ if (!bitmap_and(apm, matrix_mdev->matrix.apm,
+ lstdev->matrix.apm, AP_DEVICES))
+ continue;
+
+ if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
+ lstdev->matrix.aqm, AP_DOMAINS))
+ continue;
+
+ return -EADDRINUSE;
+ }
+
+ return 0;
+}
+
+/**
+ * assign_adapter_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's assign_adapter attribute
+ * @buf: a buffer containing the AP adapter number (APID) to
+ * be assigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the APID from @buf and sets the corresponding bit in the mediated
+ * matrix device's APM.
+ *
+ * Returns the number of bytes processed if the APID is valid; otherwise,
+ * returns one of the following errors:
+ *
+ * 1. -EINVAL
+ * The APID is not a valid number
+ *
+ * 2. -ENODEV
+ * The APID exceeds the maximum value configured for the system
+ *
+ * 3. -EADDRNOTAVAIL
+ * An APQN derived from the cross product of the APID being assigned
+ * and the APQIs previously assigned is not bound to the vfio_ap device
+ * driver; or, if no APQIs have yet been assigned, the APID is not
+ * contained in an APQN bound to the vfio_ap device driver.
+ *
+ * 4. -EADDRINUSE
+ * An APQN derived from the cross product of the APID being assigned
+ * and the APQIs previously assigned is being used by another mediated
+ * matrix device
+ */
+static ssize_t assign_adapter_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long apid;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ /* If the guest is running, disallow assignment of adapter */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &apid);
+ if (ret)
+ return ret;
+
+ if (apid > matrix_mdev->matrix.apm_max)
+ return -ENODEV;
+
+ /*
+ * Set the bit in the AP mask (APM) corresponding to the AP adapter
+ * number (APID). The bits in the mask, from most significant to least
+ * significant bit, correspond to APIDs 0-255.
+ */
+ mutex_lock(&matrix_dev->lock);
+
+ ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
+ if (ret)
+ goto done;
+
+ set_bit_inv(apid, matrix_mdev->matrix.apm);
+
+ ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
+ if (ret)
+ goto share_err;
+
+ ret = count;
+ goto done;
+
+share_err:
+ clear_bit_inv(apid, matrix_mdev->matrix.apm);
+done:
+ mutex_unlock(&matrix_dev->lock);
+
+ return ret;
+}
+static DEVICE_ATTR_WO(assign_adapter);
+
+/**
+ * unassign_adapter_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's unassign_adapter attribute
+ * @buf: a buffer containing the adapter number (APID) to be unassigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the APID from @buf and clears the corresponding bit in the mediated
+ * matrix device's APM.
+ *
+ * Returns the number of bytes processed if the APID is valid; otherwise,
+ * returns one of the following errors:
+ * -EINVAL if the APID is not a number
+ * -ENODEV if the APID it exceeds the maximum value configured for the
+ * system
+ */
+static ssize_t unassign_adapter_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long apid;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ /* If the guest is running, disallow un-assignment of adapter */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &apid);
+ if (ret)
+ return ret;
+
+ if (apid > matrix_mdev->matrix.apm_max)
+ return -ENODEV;
+
+ mutex_lock(&matrix_dev->lock);
+ clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
+ mutex_unlock(&matrix_dev->lock);
+
+ return count;
+}
+DEVICE_ATTR_WO(unassign_adapter);
+
+static int
+vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apqi)
+{
+ int ret;
+ unsigned long apid;
+ unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
+
+ if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
+ return vfio_ap_verify_queue_reserved(NULL, &apqi);
+
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
+ ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * assign_domain_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's assign_domain attribute
+ * @buf: a buffer containing the AP queue index (APQI) of the domain to
+ * be assigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the APQI from @buf and sets the corresponding bit in the mediated
+ * matrix device's AQM.
+ *
+ * Returns the number of bytes processed if the APQI is valid; otherwise returns
+ * one of the following errors:
+ *
+ * 1. -EINVAL
+ * The APQI is not a valid number
+ *
+ * 2. -ENODEV
+ * The APQI exceeds the maximum value configured for the system
+ *
+ * 3. -EADDRNOTAVAIL
+ * An APQN derived from the cross product of the APQI being assigned
+ * and the APIDs previously assigned is not bound to the vfio_ap device
+ * driver; or, if no APIDs have yet been assigned, the APQI is not
+ * contained in an APQN bound to the vfio_ap device driver.
+ *
+ * 4. -EADDRINUSE
+ * An APQN derived from the cross product of the APQI being assigned
+ * and the APIDs previously assigned is being used by another mediated
+ * matrix device
+ */
+static ssize_t assign_domain_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long apqi;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+ unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
+
+ /* If the guest is running, disallow assignment of domain */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &apqi);
+ if (ret)
+ return ret;
+ if (apqi > max_apqi)
+ return -ENODEV;
+
+ mutex_lock(&matrix_dev->lock);
+
+ ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
+ if (ret)
+ goto done;
+
+ set_bit_inv(apqi, matrix_mdev->matrix.aqm);
+
+ ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
+ if (ret)
+ goto share_err;
+
+ ret = count;
+ goto done;
+
+share_err:
+ clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
+done:
+ mutex_unlock(&matrix_dev->lock);
+
+ return ret;
+}
+DEVICE_ATTR_WO(assign_domain);
+
+
+/**
+ * unassign_domain_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's unassign_domain attribute
+ * @buf: a buffer containing the AP queue index (APQI) of the domain to
+ * be unassigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the APQI from @buf and clears the corresponding bit in the
+ * mediated matrix device's AQM.
+ *
+ * Returns the number of bytes processed if the APQI is valid; otherwise,
+ * returns one of the following errors:
+ * -EINVAL if the APQI is not a number
+ * -ENODEV if the APQI exceeds the maximum value configured for the system
+ */
+static ssize_t unassign_domain_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long apqi;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ /* If the guest is running, disallow un-assignment of domain */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &apqi);
+ if (ret)
+ return ret;
+
+ if (apqi > matrix_mdev->matrix.aqm_max)
+ return -ENODEV;
+
+ mutex_lock(&matrix_dev->lock);
+ clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
+ mutex_unlock(&matrix_dev->lock);
+
+ return count;
+}
+DEVICE_ATTR_WO(unassign_domain);
+
+/**
+ * assign_control_domain_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's assign_control_domain attribute
+ * @buf: a buffer containing the domain ID to be assigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the domain ID from @buf and sets the corresponding bit in the mediated
+ * matrix device's ADM.
+ *
+ * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * returns one of the following errors:
+ * -EINVAL if the ID is not a number
+ * -ENODEV if the ID exceeds the maximum value configured for the system
+ */
+static ssize_t assign_control_domain_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long id;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ /* If the guest is running, disallow assignment of control domain */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &id);
+ if (ret)
+ return ret;
+
+ if (id > matrix_mdev->matrix.adm_max)
+ return -ENODEV;
+
+ /* Set the bit in the ADM (bitmask) corresponding to the AP control
+ * domain number (id). The bits in the mask, from most significant to
+ * least significant, correspond to IDs 0 up to the one less than the
+ * number of control domains that can be assigned.
+ */
+ mutex_lock(&matrix_dev->lock);
+ set_bit_inv(id, matrix_mdev->matrix.adm);
+ mutex_unlock(&matrix_dev->lock);
+
+ return count;
+}
+DEVICE_ATTR_WO(assign_control_domain);
+
+/**
+ * unassign_control_domain_store
+ *
+ * @dev: the matrix device
+ * @attr: the mediated matrix device's unassign_control_domain attribute
+ * @buf: a buffer containing the domain ID to be unassigned
+ * @count: the number of bytes in @buf
+ *
+ * Parses the domain ID from @buf and clears the corresponding bit in the
+ * mediated matrix device's ADM.
+ *
+ * Returns the number of bytes processed if the domain ID is valid; otherwise,
+ * returns one of the following errors:
+ * -EINVAL if the ID is not a number
+ * -ENODEV if the ID exceeds the maximum value configured for the system
+ */
+static ssize_t unassign_control_domain_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+ unsigned long domid;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+ unsigned long max_domid = matrix_mdev->matrix.adm_max;
+
+ /* If the guest is running, disallow un-assignment of control domain */
+ if (matrix_mdev->kvm)
+ return -EBUSY;
+
+ ret = kstrtoul(buf, 0, &domid);
+ if (ret)
+ return ret;
+ if (domid > max_domid)
+ return -ENODEV;
+
+ mutex_lock(&matrix_dev->lock);
+ clear_bit_inv(domid, matrix_mdev->matrix.adm);
+ mutex_unlock(&matrix_dev->lock);
+
+ return count;
+}
+DEVICE_ATTR_WO(unassign_control_domain);
+
+static ssize_t control_domains_show(struct device *dev,
+ struct device_attribute *dev_attr,
+ char *buf)
+{
+ unsigned long id;
+ int nchars = 0;
+ int n;
+ char *bufpos = buf;
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+ unsigned long max_domid = matrix_mdev->matrix.adm_max;
+
+ mutex_lock(&matrix_dev->lock);
+ for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
+ n = sprintf(bufpos, "%04lx\n", id);
+ bufpos += n;
+ nchars += n;
+ }
+ mutex_unlock(&matrix_dev->lock);
+
+ return nchars;
+}
+DEVICE_ATTR_RO(control_domains);
+
+static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+ char *bufpos = buf;
+ unsigned long apid;
+ unsigned long apqi;
+ unsigned long apid1;
+ unsigned long apqi1;
+ unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
+ unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
+ int nchars = 0;
+ int n;
+
+ apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
+ apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
+
+ mutex_lock(&matrix_dev->lock);
+
+ if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+ naqm_bits) {
+ n = sprintf(bufpos, "%02lx.%04lx\n", apid,
+ apqi);
+ bufpos += n;
+ nchars += n;
+ }
+ }
+ } else if (apid1 < napm_bits) {
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
+ n = sprintf(bufpos, "%02lx.\n", apid);
+ bufpos += n;
+ nchars += n;
+ }
+ } else if (apqi1 < naqm_bits) {
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
+ n = sprintf(bufpos, ".%04lx\n", apqi);
+ bufpos += n;
+ nchars += n;
+ }
+ }
+
+ mutex_unlock(&matrix_dev->lock);
+
+ return nchars;
+}
+DEVICE_ATTR_RO(matrix);
+
+static struct attribute *vfio_ap_mdev_attrs[] = {
+ &dev_attr_assign_adapter.attr,
+ &dev_attr_unassign_adapter.attr,
+ &dev_attr_assign_domain.attr,
+ &dev_attr_unassign_domain.attr,
+ &dev_attr_assign_control_domain.attr,
+ &dev_attr_unassign_control_domain.attr,
+ &dev_attr_control_domains.attr,
+ &dev_attr_matrix.attr,
+ NULL,
+};
+
+static struct attribute_group vfio_ap_mdev_attr_group = {
+ .attrs = vfio_ap_mdev_attrs
+};
+
+static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
+ &vfio_ap_mdev_attr_group,
+ NULL
+};
+
+static void vfio_ap_mdev_copy_masks(struct ap_matrix_mdev *matrix_mdev)
+{
+ int nbytes;
+ unsigned long *apm, *aqm, *adm;
+ struct kvm_s390_crypto_cb *crycb = matrix_mdev->kvm->arch.crypto.crycb;
+
+ switch (matrix_mdev->kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
+ case CRYCB_FORMAT2:
+ apm = (unsigned long *)crycb->apcb1.apm;
+ aqm = (unsigned long *)crycb->apcb1.aqm;
+ adm = (unsigned long *)crycb->apcb1.adm;
+ break;
+ case CRYCB_FORMAT1:
+ case CRYCB_FORMAT0:
+ apm = (unsigned long *)crycb->apcb0.apm;
+ aqm = (unsigned long *)crycb->apcb0.aqm;
+ adm = (unsigned long *)crycb->apcb0.adm;
+ break;
+ default:
+ /* cannot happen */
+ return;
+ }
+
+ nbytes = DIV_ROUND_UP(matrix_mdev->matrix.apm_max + 1, BITS_PER_BYTE);
+ memcpy(apm, matrix_mdev->matrix.apm, nbytes);
+ nbytes = DIV_ROUND_UP(matrix_mdev->matrix.aqm_max + 1, BITS_PER_BYTE);
+ memcpy(aqm, matrix_mdev->matrix.aqm, nbytes);
+ nbytes = DIV_ROUND_UP(matrix_mdev->matrix.adm_max + 1, BITS_PER_BYTE);
+ memcpy(adm, matrix_mdev->matrix.adm, nbytes);
+}
+
+/**
+ * vfio_ap_mdev_set_kvm
+ *
+ * @matrix_mdev: a mediated matrix device
+ * @kvm: reference to KVM instance
+ *
+ * Verifies no other mediated matrix device has @kvm and sets a reference to
+ * it in @matrix_mdev->kvm.
+ *
+ * Return 0 if no other mediated matrix device has a reference to @kvm;
+ * otherwise, returns an -EPERM.
+ */
+static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
+ struct kvm *kvm)
+{
+ struct ap_matrix_mdev *m;
+
+ mutex_lock(&matrix_dev->lock);
+
+ list_for_each_entry(m, &matrix_dev->mdev_list, node) {
+ if ((m != matrix_mdev) && (m->kvm == kvm)) {
+ mutex_unlock(&matrix_dev->lock);
+ return -EPERM;
+ }
+ }
+
+ matrix_mdev->kvm = kvm;
+ mutex_unlock(&matrix_dev->lock);
+
+ return 0;
+}
+
+static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ int ret;
+ struct ap_matrix_mdev *matrix_mdev;
+
+ if (action != VFIO_GROUP_NOTIFY_SET_KVM)
+ return NOTIFY_OK;
+
+ matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
+
+ if (!data) {
+ matrix_mdev->kvm = NULL;
+ return NOTIFY_OK;
+ }
+
+ ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
+ if (ret)
+ return NOTIFY_DONE;
+
+ /* If there is no CRYCB pointer, then we can't copy the masks */
+ if (!matrix_mdev->kvm->arch.crypto.crycbd)
+ return NOTIFY_DONE;
+
+ vfio_ap_mdev_copy_masks(matrix_mdev);
+
+ return NOTIFY_OK;
+}
+
+static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+ unsigned int retry)
+{
+ struct ap_queue_status status;
+
+ do {
+ status = ap_zapq(AP_MKQID(apid, apqi));
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ return 0;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ msleep(20);
+ break;
+ default:
+ /* things are really broken, give up */
+ return -EIO;
+ }
+ } while (retry--);
+
+ return -EBUSY;
+}
+
+static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
+{
+ int ret;
+ int rc = 0;
+ unsigned long apid, apqi;
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
+ matrix_mdev->matrix.apm_max + 1) {
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+ matrix_mdev->matrix.aqm_max + 1) {
+ ret = vfio_ap_mdev_reset_queue(apid, apqi, 1);
+ /*
+ * Regardless whether a queue turns out to be busy, or
+ * is not operational, we need to continue resetting
+ * the remaining queues.
+ */
+ if (ret)
+ rc = ret;
+ }
+ }
+
+ return rc;
+}
+
+static int vfio_ap_mdev_open(struct mdev_device *mdev)
+{
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+ unsigned long events;
+ int ret;
+
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
+ events = VFIO_GROUP_NOTIFY_SET_KVM;
+
+ ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+ &events, &matrix_mdev->group_notifier);
+ if (ret) {
+ module_put(THIS_MODULE);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vfio_ap_mdev_release(struct mdev_device *mdev)
+{
+ struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+ if (matrix_mdev->kvm)
+ kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+
+ vfio_ap_mdev_reset_queues(mdev);
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+ &matrix_mdev->group_notifier);
+ matrix_mdev->kvm = NULL;
+ module_put(THIS_MODULE);
+}
+
+static int vfio_ap_mdev_get_device_info(unsigned long arg)
+{
+ unsigned long minsz;
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
+ info.num_regions = 0;
+ info.num_irqs = 0;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+}
+
+static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
+ unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ ret = vfio_ap_mdev_get_device_info(arg);
+ break;
+ case VFIO_DEVICE_RESET:
+ ret = vfio_ap_mdev_reset_queues(mdev);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+static const struct mdev_parent_ops vfio_ap_matrix_ops = {
+ .owner = THIS_MODULE,
+ .supported_type_groups = vfio_ap_mdev_type_groups,
+ .mdev_attr_groups = vfio_ap_mdev_attr_groups,
+ .create = vfio_ap_mdev_create,
+ .remove = vfio_ap_mdev_remove,
+ .open = vfio_ap_mdev_open,
+ .release = vfio_ap_mdev_release,
+ .ioctl = vfio_ap_mdev_ioctl,
+};
+
+int vfio_ap_mdev_register(void)
+{
+ atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
+
+ return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
+}
+
+void vfio_ap_mdev_unregister(void)
+{
+ mdev_unregister_device(&matrix_dev->device);
+}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
new file mode 100644
index 000000000000..5675492233c7
--- /dev/null
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Private data and functions for adjunct processor VFIO matrix driver.
+ *
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ * Halil Pasic <pasic@linux.ibm.com>
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef _VFIO_AP_PRIVATE_H_
+#define _VFIO_AP_PRIVATE_H_
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/mdev.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+
+#include "ap_bus.h"
+
+#define VFIO_AP_MODULE_NAME "vfio_ap"
+#define VFIO_AP_DRV_NAME "vfio_ap"
+
+/**
+ * ap_matrix_dev - the AP matrix device structure
+ * @device: generic device structure associated with the AP matrix device
+ * @available_instances: number of mediated matrix devices that can be created
+ * @info: the struct containing the output from the PQAP(QCI) instruction
+ * mdev_list: the list of mediated matrix devices created
+ * lock: mutex for locking the AP matrix device. This lock will be
+ * taken every time we fiddle with state managed by the vfio_ap
+ * driver, be it using @mdev_list or writing the state of a
+ * single ap_matrix_mdev device. It's quite coarse but we don't
+ * expect much contention.
+ */
+struct ap_matrix_dev {
+ struct device device;
+ atomic_t available_instances;
+ struct ap_config_info info;
+ struct list_head mdev_list;
+ struct mutex lock;
+};
+
+extern struct ap_matrix_dev *matrix_dev;
+
+/**
+ * The AP matrix is comprised of three bit masks identifying the adapters,
+ * queues (domains) and control domains that belong to an AP matrix. The bits i
+ * each mask, from least significant to most significant bit, correspond to IDs
+ * 0 to 255. When a bit is set, the corresponding ID belongs to the matrix.
+ *
+ * @apm_max: max adapter number in @apm
+ * @apm identifies the AP adapters in the matrix
+ * @aqm_max: max domain number in @aqm
+ * @aqm identifies the AP queues (domains) in the matrix
+ * @adm_max: max domain number in @adm
+ * @adm identifies the AP control domains in the matrix
+ */
+struct ap_matrix {
+ unsigned long apm_max;
+ DECLARE_BITMAP(apm, 256);
+ unsigned long aqm_max;
+ DECLARE_BITMAP(aqm, 256);
+ unsigned long adm_max;
+ DECLARE_BITMAP(adm, 256);
+};
+
+/**
+ * struct ap_matrix_mdev - the mediated matrix device structure
+ * @list: allows the ap_matrix_mdev struct to be added to a list
+ * @matrix: the adapters, usage domains and control domains assigned to the
+ * mediated matrix device.
+ * @group_notifier: notifier block used for specifying callback function for
+ * handling the VFIO_GROUP_NOTIFY_SET_KVM event
+ * @kvm: the struct holding guest's state
+ */
+struct ap_matrix_mdev {
+ struct list_head node;
+ struct ap_matrix matrix;
+ struct notifier_block group_notifier;
+ struct kvm *kvm;
+};
+
+extern int vfio_ap_mdev_register(void);
+extern void vfio_ap_mdev_unregister(void);
+
+#endif /* _VFIO_AP_PRIVATE_H_ */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 1aa7b82e8169..f378b9802d8b 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -200,6 +200,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
+#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
};
@@ -215,6 +216,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform"
#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba"
#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
+#define VFIO_DEVICE_API_AP_STRING "vfio-ap"
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,