diff options
Diffstat (limited to 'drivers/s390')
32 files changed, 1635 insertions, 967 deletions
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ba6d78789660..ea82821599f6 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1725,7 +1725,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, dasd_put_device(device); } - /* check for for attention message */ + /* check for attention message */ if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) { device = dasd_device_from_cdev_locked(cdev); if (!IS_ERR(device)) { @@ -3145,7 +3145,7 @@ out: * BLK_EH_DONE if the request is handled or terminated * by the driver. */ -enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) +enum blk_eh_timer_return dasd_times_out(struct request *req) { struct dasd_block *block = req->q->queuedata; struct dasd_device *device; @@ -3280,7 +3280,7 @@ static int dasd_alloc_queue(struct dasd_block *block) static void dasd_free_queue(struct dasd_block *block) { if (block->request_queue) { - blk_cleanup_queue(block->request_queue); + blk_mq_destroy_queue(block->request_queue); blk_mq_free_tag_set(&block->tag_set); block->request_queue = NULL; } diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index e9edf3b6ed7c..94ee59864971 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -639,6 +639,7 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block) /* With page sized segments each segment can be translated into one idaw/tidaw */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); + blk_queue_dma_alignment(q, PAGE_SIZE - 1); } static int dasd_diag_pe_handler(struct dasd_device *device, diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 836838f7d686..3cc93e2e4e15 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -6626,6 +6626,7 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) /* With page sized segments each segment can be translated into one idaw/tidaw */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); + blk_queue_dma_alignment(q, PAGE_SIZE - 1); } static struct ccw_driver dasd_eckd_driver = { diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index a7a33ebf4bbe..5a83f0a39901 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,8 +41,8 @@ int dasd_gendisk_alloc(struct dasd_block *block) if (base->devindex >= DASD_PER_MAJOR) return -EBUSY; - gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE, - &dasd_bio_compl_lkclass); + gdp = blk_mq_alloc_disk_for_queue(block->request_queue, + &dasd_bio_compl_lkclass); if (!gdp) return -ENOMEM; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 83b918b84b4a..333a399f754e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -795,7 +795,7 @@ void dasd_free_device(struct dasd_device *); struct dasd_block *dasd_alloc_block(void); void dasd_free_block(struct dasd_block *); -enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved); +enum blk_eh_timer_return dasd_times_out(struct request *req); void dasd_enable_device(struct dasd_device *); void dasd_set_target_state(struct dasd_device *, int); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 8d0d0eaa3059..5187705bd0f3 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -414,7 +414,7 @@ removeseg: kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); up_write(&dcssblk_devices_sem); if (device_remove_file_self(dev, attr)) { @@ -712,7 +712,7 @@ out_dax: put_dax(dev_info->dax_dev); put_dev: list_del(&dev_info->lh); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); list_for_each_entry(seg_info, &dev_info->seg_list, lh) { segment_unload(seg_info->segment_name); } @@ -722,7 +722,7 @@ put_dev: dev_list_del: list_del(&dev_info->lh); release_gd: - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); up_write(&dcssblk_devices_sem); seg_list_del: if (dev_info == NULL) @@ -790,7 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); /* unload all related segments */ list_for_each_entry(entry, &dev_info->seg_list, lh) @@ -863,7 +863,7 @@ dcssblk_submit_bio(struct bio *bio) unsigned long source_addr; unsigned long bytes_done; - blk_queue_split(&bio); + bio = bio_split_to_limits(bio); bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 2a9c0ddcade5..0c1df1d5f1ac 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -501,7 +501,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) return 0; out_cleanup_disk: - blk_cleanup_disk(bdev->gendisk); + put_disk(bdev->gendisk); out_tag: blk_mq_free_tag_set(&bdev->tag_set); out: @@ -512,7 +512,7 @@ out: void scm_blk_dev_cleanup(struct scm_blk_dev *bdev) { del_gendisk(bdev->gendisk); - blk_cleanup_disk(bdev->gendisk); + put_disk(bdev->gendisk); blk_mq_free_tag_set(&bdev->tag_set); } diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 57f41efb8043..7d1749b0d378 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -89,7 +89,7 @@ config HMC_DRV Management Console (HMC) drive CD/DVD-ROM. It is available as a module, called 'hmcdrv', and also as kernel built-in. There is one optional parameter for this module: cachesize=N, which modifies the - transfer cache size from it's default value 0.5MB to N bytes. If N + transfer cache size from its default value 0.5MB to N bytes. If N is zero, then no caching is performed. config SCLP_OFB diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index cb2491761958..ae1d6ee382a5 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -60,7 +60,7 @@ static LIST_HEAD(sclp_reg_list); /* List of queued requests. */ static LIST_HEAD(sclp_req_queue); -/* Data for read and and init requests. */ +/* Data for read and init requests. */ static struct sclp_req sclp_read_req; static struct sclp_req sclp_init_req; static void *sclp_read_sccb; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index dd313ff57df3..d15b0d541de3 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -45,6 +45,10 @@ static void __init sclp_early_facilities_detect(void) sclp.has_gisaf = !!(sccb->fac118 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); sclp.has_kss = !!(sccb->fac98 & 0x01); + sclp.has_aisii = !!(sccb->fac118 & 0x40); + sclp.has_aeni = !!(sccb->fac118 & 0x20); + sclp.has_aisi = !!(sccb->fac118 & 0x10); + sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 38cc1565d6ae..751945fb6793 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -548,7 +548,7 @@ tape_34xx_unit_check(struct tape_device *device, struct tape_request *request, case 0x2e: /* * Not capable. This indicates either that the drive fails - * reading the format id mark or that that format specified + * reading the format id mark or that format specified * is not supported by the drive. */ dev_warn (&device->cdev->dev, "The tape unit cannot process " diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c index 66505d7166a6..1d40457c7b10 100644 --- a/drivers/s390/char/uvdevice.c +++ b/drivers/s390/char/uvdevice.c @@ -27,6 +27,7 @@ #include <linux/stddef.h> #include <linux/vmalloc.h> #include <linux/slab.h> +#include <linux/cpufeature.h> #include <asm/uvdevice.h> #include <asm/uv.h> @@ -244,12 +245,10 @@ static void __exit uvio_dev_exit(void) static int __init uvio_dev_init(void) { - if (!test_facility(158)) - return -ENXIO; return misc_register(&uvio_dev_miscdev); } -module_init(uvio_dev_init); +module_cpu_feature_match(S390_CPU_FEATURE_UV, uvio_dev_init); module_exit(uvio_dev_exit); MODULE_AUTHOR("IBM Corporation"); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 516783ba950f..f6da215ccf9f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -17,6 +17,7 @@ #include <linux/debugfs.h> #include <linux/panic_notifier.h> #include <linux/reboot.h> +#include <linux/uio.h> #include <asm/asm-offsets.h> #include <asm/ipl.h> @@ -50,36 +51,41 @@ static struct dentry *zcore_reipl_file; static struct dentry *zcore_hsa_file; static struct ipl_parameter_block *zcore_ipl_block; +static DEFINE_MUTEX(hsa_buf_mutex); static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE); /* - * Copy memory from HSA to user memory (not reentrant): + * Copy memory from HSA to iterator (not reentrant): * - * @dest: User buffer where memory should be copied to + * @iter: Iterator where memory should be copied to * @src: Start address within HSA where data should be copied * @count: Size of buffer, which should be copied */ -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count) +size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long offset, bytes; + size_t bytes, copied, res = 0; + unsigned long offset; if (!hsa_available) - return -ENODATA; + return 0; + mutex_lock(&hsa_buf_mutex); while (count) { if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { TRACE("sclp_sdias_copy() failed\n"); - return -EIO; + break; } offset = src % PAGE_SIZE; bytes = min(PAGE_SIZE - offset, count); - if (copy_to_user(dest, hsa_buf + offset, bytes)) - return -EFAULT; - src += bytes; - dest += bytes; - count -= bytes; + copied = copy_to_iter(hsa_buf + offset, bytes, iter); + count -= copied; + src += copied; + res += copied; + if (copied < bytes) + break; } - return 0; + mutex_unlock(&hsa_buf_mutex); + return res; } /* @@ -89,25 +95,16 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count) * @src: Start address within HSA where data should be copied * @count: Size of buffer, which should be copied */ -int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) +static inline int memcpy_hsa_kernel(void *dst, unsigned long src, size_t count) { - unsigned long offset, bytes; + struct iov_iter iter; + struct kvec kvec; - if (!hsa_available) - return -ENODATA; - - while (count) { - if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { - TRACE("sclp_sdias_copy() failed\n"); - return -EIO; - } - offset = src % PAGE_SIZE; - bytes = min(PAGE_SIZE - offset, count); - memcpy(dest, hsa_buf + offset, bytes); - src += bytes; - dest += bytes; - count -= bytes; - } + kvec.iov_base = dst; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (memcpy_hsa_iter(&iter, src, count) < count) + return -EIO; return 0; } diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index c0ed364bf446..34967e67249e 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -99,7 +99,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy) rcu_read_lock(); hlist_for_each_entry_rcu(airq, head, list) if ((*airq->lsi_ptr & airq->lsi_mask) != 0) - airq->handler(airq, !tpi_info->directed_irq); + airq->handler(airq, tpi_info); rcu_read_unlock(); return IRQ_HANDLED; @@ -122,10 +122,12 @@ static inline unsigned long iv_size(unsigned long bits) * airq_iv_create - create an interrupt vector * @bits: number of bits in the interrupt vector * @flags: allocation flags + * @vec: pointer to pinned guest memory if AIRQ_IV_GUESTVEC * * Returns a pointer to an interrupt vector structure */ -struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags) +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags, + unsigned long *vec) { struct airq_iv *iv; unsigned long size; @@ -146,6 +148,8 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags) &iv->vector_dma); if (!iv->vector) goto out_free; + } else if (flags & AIRQ_IV_GUESTVEC) { + iv->vector = vec; } else { iv->vector = cio_dma_zalloc(size); if (!iv->vector) @@ -185,7 +189,7 @@ out_free: kfree(iv->avail); if (iv->flags & AIRQ_IV_CACHELINE && iv->vector) dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma); - else + else if (!(iv->flags & AIRQ_IV_GUESTVEC)) cio_dma_free(iv->vector, size); kfree(iv); out: @@ -204,7 +208,7 @@ void airq_iv_release(struct airq_iv *iv) kfree(iv->bitlock); if (iv->flags & AIRQ_IV_CACHELINE) dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma); - else + else if (!(iv->flags & AIRQ_IV_GUESTVEC)) cio_dma_free(iv->vector, iv_size(iv->bits)); kfree(iv->avail); kfree(iv); diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 8e09bf3a2fcd..9b9335dd06db 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -15,6 +15,7 @@ #include <asm/qdio.h> #include <asm/airq.h> #include <asm/isc.h> +#include <asm/tpi.h> #include "cio.h" #include "ioasm.h" @@ -93,9 +94,10 @@ static inline u32 clear_shared_ind(void) /** * tiqdio_thinint_handler - thin interrupt handler for qdio * @airq: pointer to adapter interrupt descriptor - * @floating: flag to recognize floating vs. directed interrupts (unused) + * @tpi_info: interrupt information (e.g. floating vs directed -- unused) */ -static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating) +static void tiqdio_thinint_handler(struct airq_struct *airq, + struct tpi_info *tpi_info) { u64 irq_time = S390_lowcore.int_clock; u32 si_used = clear_shared_ind(); diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c index 7a838e3d7c0f..420d89ba7f83 100644 --- a/drivers/s390/cio/vfio_ccw_async.c +++ b/drivers/s390/cio/vfio_ccw_async.c @@ -8,7 +8,6 @@ */ #include <linux/vfio.h> -#include <linux/mdev.h> #include "vfio_ccw_private.h" diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 0c2be9421ab7..7b02e97f4b29 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -11,6 +11,7 @@ #include <linux/ratelimit.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/highmem.h> #include <linux/iommu.h> #include <linux/vfio.h> #include <asm/idals.h> @@ -18,13 +19,11 @@ #include "vfio_ccw_cp.h" #include "vfio_ccw_private.h" -struct pfn_array { - /* Starting guest physical I/O address. */ - unsigned long pa_iova; - /* Array that stores PFNs of the pages need to pin. */ - unsigned long *pa_iova_pfn; - /* Array that receives PFNs of the pages pinned. */ - unsigned long *pa_pfn; +struct page_array { + /* Array that stores pages need to pin. */ + dma_addr_t *pa_iova; + /* Array that receives the pinned pages. */ + struct page **pa_page; /* Number of pages pinned from @pa_iova. */ int pa_nr; }; @@ -37,116 +36,158 @@ struct ccwchain { /* Count of the valid ccws in chain. */ int ch_len; /* Pinned PAGEs for the original data. */ - struct pfn_array *ch_pa; + struct page_array *ch_pa; }; /* - * pfn_array_alloc() - alloc memory for PFNs - * @pa: pfn_array on which to perform the operation + * page_array_alloc() - alloc memory for page array + * @pa: page_array on which to perform the operation * @iova: target guest physical address * @len: number of bytes that should be pinned from @iova * - * Attempt to allocate memory for PFNs. + * Attempt to allocate memory for page array. * - * Usage of pfn_array: - * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in + * Usage of page_array: + * We expect (pa_nr == 0) and (pa_iova == NULL), any field in * this structure will be filled in by this function. * * Returns: - * 0 if PFNs are allocated - * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL + * 0 if page array is allocated + * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL * -ENOMEM if alloc failed */ -static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) +static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len) { int i; - if (pa->pa_nr || pa->pa_iova_pfn) + if (pa->pa_nr || pa->pa_iova) return -EINVAL; - pa->pa_iova = iova; - pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (!pa->pa_nr) return -EINVAL; - pa->pa_iova_pfn = kcalloc(pa->pa_nr, - sizeof(*pa->pa_iova_pfn) + - sizeof(*pa->pa_pfn), - GFP_KERNEL); - if (unlikely(!pa->pa_iova_pfn)) { + pa->pa_iova = kcalloc(pa->pa_nr, + sizeof(*pa->pa_iova) + sizeof(*pa->pa_page), + GFP_KERNEL); + if (unlikely(!pa->pa_iova)) { pa->pa_nr = 0; return -ENOMEM; } - pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; + pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr]; - pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; - pa->pa_pfn[0] = -1ULL; + pa->pa_iova[0] = iova; + pa->pa_page[0] = NULL; for (i = 1; i < pa->pa_nr; i++) { - pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; - pa->pa_pfn[i] = -1ULL; + pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE; + pa->pa_page[i] = NULL; } return 0; } /* - * pfn_array_pin() - Pin user pages in memory - * @pa: pfn_array on which to perform the operation + * page_array_unpin() - Unpin user pages in memory + * @pa: page_array on which to perform the operation + * @vdev: the vfio device to perform the operation + * @pa_nr: number of user pages to unpin + * + * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, + * otherwise only clear pa->pa_nr + */ +static void page_array_unpin(struct page_array *pa, + struct vfio_device *vdev, int pa_nr) +{ + int unpinned = 0, npage = 1; + + while (unpinned < pa_nr) { + dma_addr_t *first = &pa->pa_iova[unpinned]; + dma_addr_t *last = &first[npage]; + + if (unpinned + npage < pa_nr && + *first + npage * PAGE_SIZE == *last) { + npage++; + continue; + } + + vfio_unpin_pages(vdev, *first, npage); + unpinned += npage; + npage = 1; + } + + pa->pa_nr = 0; +} + +/* + * page_array_pin() - Pin user pages in memory + * @pa: page_array on which to perform the operation * @mdev: the mediated device to perform pin operations * * Returns number of pages pinned upon success. * If the pin request partially succeeds, or fails completely, * all pages are left unpinned and a negative error value is returned. */ -static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) +static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) { + int pinned = 0, npage = 1; int ret = 0; - ret = vfio_pin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr, - IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); + while (pinned < pa->pa_nr) { + dma_addr_t *first = &pa->pa_iova[pinned]; + dma_addr_t *last = &first[npage]; - if (ret < 0) { - goto err_out; - } else if (ret > 0 && ret != pa->pa_nr) { - vfio_unpin_pages(vdev, pa->pa_iova_pfn, ret); - ret = -EINVAL; - goto err_out; + if (pinned + npage < pa->pa_nr && + *first + npage * PAGE_SIZE == *last) { + npage++; + continue; + } + + ret = vfio_pin_pages(vdev, *first, npage, + IOMMU_READ | IOMMU_WRITE, + &pa->pa_page[pinned]); + if (ret < 0) { + goto err_out; + } else if (ret > 0 && ret != npage) { + pinned += ret; + ret = -EINVAL; + goto err_out; + } + pinned += npage; + npage = 1; } return ret; err_out: - pa->pa_nr = 0; - + page_array_unpin(pa, vdev, pinned); return ret; } /* Unpin the pages before releasing the memory. */ -static void pfn_array_unpin_free(struct pfn_array *pa, struct vfio_device *vdev) +static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev) { - /* Only unpin if any pages were pinned to begin with */ - if (pa->pa_nr) - vfio_unpin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr); - pa->pa_nr = 0; - kfree(pa->pa_iova_pfn); + page_array_unpin(pa, vdev, pa->pa_nr); + kfree(pa->pa_iova); } -static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova) +static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) { - unsigned long iova_pfn = iova >> PAGE_SHIFT; + u64 iova_pfn_start = iova >> PAGE_SHIFT; + u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; + u64 pfn; int i; - for (i = 0; i < pa->pa_nr; i++) - if (pa->pa_iova_pfn[i] == iova_pfn) + for (i = 0; i < pa->pa_nr; i++) { + pfn = pa->pa_iova[i] >> PAGE_SHIFT; + if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) return true; + } return false; } -/* Create the list of IDAL words for a pfn_array. */ -static inline void pfn_array_idal_create_words( - struct pfn_array *pa, - unsigned long *idaws) +/* Create the list of IDAL words for a page_array. */ +static inline void page_array_idal_create_words(struct page_array *pa, + unsigned long *idaws) { int i; @@ -159,10 +200,10 @@ static inline void pfn_array_idal_create_words( */ for (i = 0; i < pa->pa_nr; i++) - idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT; + idaws[i] = page_to_phys(pa->pa_page[i]); /* Adjust the first IDAW, since it may not start on a page boundary */ - idaws[0] += pa->pa_iova & (PAGE_SIZE - 1); + idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1); } static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) @@ -194,24 +235,24 @@ static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, unsigned long n) { - struct pfn_array pa = {0}; - u64 from; + struct page_array pa = {0}; int i, ret; unsigned long l, m; - ret = pfn_array_alloc(&pa, iova, n); + ret = page_array_alloc(&pa, iova, n); if (ret < 0) return ret; - ret = pfn_array_pin(&pa, vdev); + ret = page_array_pin(&pa, vdev); if (ret < 0) { - pfn_array_unpin_free(&pa, vdev); + page_array_unpin_free(&pa, vdev); return ret; } l = n; for (i = 0; i < pa.pa_nr; i++) { - from = pa.pa_pfn[i] << PAGE_SHIFT; + void *from = kmap_local_page(pa.pa_page[i]); + m = PAGE_SIZE; if (i == 0) { from += iova & (PAGE_SIZE - 1); @@ -219,14 +260,15 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, } m = min(l, m); - memcpy(to + (n - l), (void *)from, m); + memcpy(to + (n - l), from, m); + kunmap_local(from); l -= m; if (l == 0) break; } - pfn_array_unpin_free(&pa, vdev); + page_array_unpin_free(&pa, vdev); return l; } @@ -329,7 +371,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) chain->ch_ccw = (struct ccw1 *)data; data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len; - chain->ch_pa = (struct pfn_array *)data; + chain->ch_pa = (struct page_array *)data; chain->ch_len = len; @@ -513,7 +555,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, struct vfio_device *vdev = &container_of(cp, struct vfio_ccw_private, cp)->vdev; struct ccw1 *ccw; - struct pfn_array *pa; + struct page_array *pa; u64 iova; unsigned long *idaws; int ret; @@ -547,13 +589,13 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, } /* - * Allocate an array of pfn's for pages to pin/translate. + * Allocate an array of pages to pin/translate. * The number of pages is actually the count of the idaws * required for the data transfer, since we only only support * 4K IDAWs today. */ pa = chain->ch_pa + idx; - ret = pfn_array_alloc(pa, iova, bytes); + ret = page_array_alloc(pa, iova, bytes); if (ret < 0) goto out_free_idaws; @@ -564,21 +606,21 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, goto out_unpin; /* - * Copy guest IDAWs into pfn_array, in case the memory they + * Copy guest IDAWs into page_array, in case the memory they * occupy is not contiguous. */ for (i = 0; i < idaw_nr; i++) - pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT; + pa->pa_iova[i] = idaws[i]; } else { /* - * No action is required here; the iova addresses in pfn_array - * were initialized sequentially in pfn_array_alloc() beginning + * No action is required here; the iova addresses in page_array + * were initialized sequentially in page_array_alloc() beginning * with the contents of ccw->cda. */ } if (ccw_does_data_transfer(ccw)) { - ret = pfn_array_pin(pa, vdev); + ret = page_array_pin(pa, vdev); if (ret < 0) goto out_unpin; } else { @@ -588,13 +630,13 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ccw->cda = (__u32) virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; - /* Populate the IDAL with pinned/translated addresses from pfn */ - pfn_array_idal_create_words(pa, idaws); + /* Populate the IDAL with pinned/translated addresses from page */ + page_array_idal_create_words(pa, idaws); return 0; out_unpin: - pfn_array_unpin_free(pa, vdev); + page_array_unpin_free(pa, vdev); out_free_idaws: kfree(idaws); out_init: @@ -700,7 +742,7 @@ void cp_free(struct channel_program *cp) cp->initialized = false; list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) { - pfn_array_unpin_free(chain->ch_pa + i, vdev); + page_array_unpin_free(chain->ch_pa + i, vdev); ccwchain_cda_free(chain, i); } ccwchain_free(chain); @@ -862,11 +904,12 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) * cp_iova_pinned() - check if an iova is pinned for a ccw chain. * @cp: channel_program on which to perform the operation * @iova: the iova to check + * @length: the length to check from @iova * * If the @iova is currently pinned for the ccw chain, return true; * else return false. */ -bool cp_iova_pinned(struct channel_program *cp, u64 iova) +bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) { struct ccwchain *chain; int i; @@ -876,7 +919,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova) list_for_each_entry(chain, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) - if (pfn_array_iova_pinned(chain->ch_pa + i, iova)) + if (page_array_iova_pinned(chain->ch_pa + i, iova, length)) return true; } diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h index e4c436199b4c..54d26e242533 100644 --- a/drivers/s390/cio/vfio_ccw_cp.h +++ b/drivers/s390/cio/vfio_ccw_cp.h @@ -41,11 +41,11 @@ struct channel_program { struct ccw1 *guest_cp; }; -extern int cp_init(struct channel_program *cp, union orb *orb); -extern void cp_free(struct channel_program *cp); -extern int cp_prefetch(struct channel_program *cp); -extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); -extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw); -extern bool cp_iova_pinned(struct channel_program *cp, u64 iova); +int cp_init(struct channel_program *cp, union orb *orb); +void cp_free(struct channel_program *cp); +int cp_prefetch(struct channel_program *cp); +union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); +void cp_update_scsw(struct channel_program *cp, union scsw *scsw); +bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length); #endif diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ee182cfb467d..86d9e428357b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -14,7 +14,6 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/slab.h> -#include <linux/uuid.h> #include <linux/mdev.h> #include <asm/isc.h> @@ -42,13 +41,6 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) DECLARE_COMPLETION_ONSTACK(completion); int iretry, ret = 0; - spin_lock_irq(sch->lock); - if (!sch->schib.pmcw.ena) - goto out_unlock; - ret = cio_disable_subchannel(sch); - if (ret != -EBUSY) - goto out_unlock; - iretry = 255; do { @@ -75,9 +67,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) spin_lock_irq(sch->lock); ret = cio_disable_subchannel(sch); } while (ret == -EBUSY); -out_unlock: - private->state = VFIO_CCW_STATE_NOT_OPER; - spin_unlock_irq(sch->lock); + return ret; } @@ -107,9 +97,10 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) /* * Reset to IDLE only if processing of a channel program * has finished. Do not overwrite a possible processing - * state if the final interrupt was for HSCH or CSCH. + * state if the interrupt was unsolicited, or if the final + * interrupt was for HSCH or CSCH. */ - if (private->mdev && cp_is_finished) + if (cp_is_finished) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) @@ -147,7 +138,7 @@ static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) private->sch = sch; mutex_init(&private->io_mutex); - private->state = VFIO_CCW_STATE_NOT_OPER; + private->state = VFIO_CCW_STATE_STANDBY; INIT_LIST_HEAD(&private->crw); INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); @@ -231,26 +222,15 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) dev_set_drvdata(&sch->dev, private); - spin_lock_irq(sch->lock); - sch->isc = VFIO_CCW_ISC; - ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); - spin_unlock_irq(sch->lock); + ret = mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); if (ret) goto out_free; - private->state = VFIO_CCW_STATE_STANDBY; - - ret = vfio_ccw_mdev_reg(sch); - if (ret) - goto out_disable; - VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); return 0; -out_disable: - cio_disable_subchannel(sch); out_free: dev_set_drvdata(&sch->dev, NULL); vfio_ccw_free_private(private); @@ -261,8 +241,7 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); - vfio_ccw_sch_quiesce(sch); - vfio_ccw_mdev_unreg(sch); + mdev_unregister_device(&sch->dev); dev_set_drvdata(&sch->dev, NULL); @@ -275,7 +254,10 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) static void vfio_ccw_sch_shutdown(struct subchannel *sch) { - vfio_ccw_sch_quiesce(sch); + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); } /** @@ -301,19 +283,11 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (work_pending(&sch->todo_work)) goto out_unlock; - if (cio_update_schib(sch)) { - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); - rc = 0; - goto out_unlock; - } - - private = dev_get_drvdata(&sch->dev); - if (private->state == VFIO_CCW_STATE_NOT_OPER) { - private->state = private->mdev ? VFIO_CCW_STATE_IDLE : - VFIO_CCW_STATE_STANDBY; - } rc = 0; + if (cio_update_schib(sch)) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + out_unlock: spin_unlock_irqrestore(sch->lock, flags); @@ -358,8 +332,8 @@ static int vfio_ccw_chp_event(struct subchannel *sch, return 0; trace_vfio_ccw_chp_event(private->sch->schid, mask, event); - VFIO_CCW_MSG_EVENT(2, "%pUl (%x.%x.%04x): mask=0x%x event=%d\n", - mdev_uuid(private->mdev), sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n", + sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no, mask, event); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 8483a266051c..a59c758869f8 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -10,7 +10,8 @@ */ #include <linux/vfio.h> -#include <linux/mdev.h> + +#include <asm/isc.h> #include "ioasm.h" #include "vfio_ccw_private.h" @@ -161,8 +162,12 @@ static void fsm_notoper(struct vfio_ccw_private *private, { struct subchannel *sch = private->sch; - VFIO_CCW_TRACE_EVENT(2, "notoper"); - VFIO_CCW_TRACE_EVENT(2, dev_name(&sch->dev)); + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n", + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no, + event, + private->state); /* * TODO: @@ -170,6 +175,9 @@ static void fsm_notoper(struct vfio_ccw_private *private, */ css_sched_sch_todo(sch, SCH_TODO_UNREG); private->state = VFIO_CCW_STATE_NOT_OPER; + + /* This is usually handled during CLOSE event */ + cp_free(&private->cp); } /* @@ -242,7 +250,6 @@ static void fsm_io_request(struct vfio_ccw_private *private, union orb *orb; union scsw *scsw = &private->scsw; struct ccw_io_region *io_region = private->io_region; - struct mdev_device *mdev = private->mdev; char *errstr = "request"; struct subchannel_id schid = get_schid(private); @@ -256,8 +263,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, if (orb->tm.b) { io_region->ret_code = -EOPNOTSUPP; VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): transport mode\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: transport mode\n", + schid.cssid, schid.ssid, schid.sch_no); errstr = "transport mode"; goto err_out; @@ -265,8 +272,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_init(&private->cp, orb); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_init=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_init=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp init"; @@ -276,8 +283,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_prefetch(&private->cp); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_prefetch=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_prefetch=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp prefetch"; @@ -289,8 +296,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = fsm_io_helper(private); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): fsm_io_helper=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: fsm_io_helper=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp fsm_io_helper"; @@ -300,16 +307,16 @@ static void fsm_io_request(struct vfio_ccw_private *private, return; } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): halt on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: halt on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* halt is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; goto err_out; } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): clear on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: clear on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* clear is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; @@ -366,6 +373,54 @@ static void fsm_irq(struct vfio_ccw_private *private, complete(private->completion); } +static void fsm_open(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + int ret; + + spin_lock_irq(sch->lock); + sch->isc = VFIO_CCW_ISC; + ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); + if (ret) + goto err_unlock; + + private->state = VFIO_CCW_STATE_IDLE; + spin_unlock_irq(sch->lock); + return; + +err_unlock: + spin_unlock_irq(sch->lock); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); +} + +static void fsm_close(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + int ret; + + spin_lock_irq(sch->lock); + + if (!sch->schib.pmcw.ena) + goto err_unlock; + + ret = cio_disable_subchannel(sch); + if (ret == -EBUSY) + ret = vfio_ccw_sch_quiesce(sch); + if (ret) + goto err_unlock; + + private->state = VFIO_CCW_STATE_STANDBY; + spin_unlock_irq(sch->lock); + cp_free(&private->cp); + return; + +err_unlock: + spin_unlock_irq(sch->lock); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); +} + /* * Device statemachine */ @@ -375,29 +430,39 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_nop, + [VFIO_CCW_EVENT_CLOSE] = fsm_nop, }, [VFIO_CCW_STATE_STANDBY] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, - [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_open, + [VFIO_CCW_EVENT_CLOSE] = fsm_notoper, }, [VFIO_CCW_STATE_IDLE] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, [VFIO_CCW_STATE_CP_PROCESSING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_retry, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_retry, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, [VFIO_CCW_STATE_CP_PENDING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, }; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index b49e2e9db2dc..4a806a2273b5 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -21,54 +21,28 @@ static const struct vfio_device_ops vfio_ccw_dev_ops; static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) { - struct subchannel *sch; - int ret; - - sch = private->sch; /* - * TODO: - * In the cureent stage, some things like "no I/O running" and "no - * interrupt pending" are clear, but we are not sure what other state - * we need to care about. - * There are still a lot more instructions need to be handled. We - * should come back here later. + * If the FSM state is seen as Not Operational after closing + * and re-opening the mdev, return an error. */ - ret = vfio_ccw_sch_quiesce(sch); - if (ret) - return ret; - - ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); - if (!ret) - private->state = VFIO_CCW_STATE_IDLE; + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); + if (private->state == VFIO_CCW_STATE_NOT_OPER) + return -EINVAL; - return ret; + return 0; } -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, - unsigned long action, - void *data) +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct vfio_ccw_private *private = - container_of(nb, struct vfio_ccw_private, nb); - - /* - * Vendor drivers MUST unpin pages in response to an - * invalidation. - */ - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - - if (!cp_iova_pinned(&private->cp, unmap->iova)) - return NOTIFY_OK; + container_of(vdev, struct vfio_ccw_private, vdev); - if (vfio_ccw_mdev_reset(private)) - return NOTIFY_BAD; + /* Drivers MUST unpin pages in response to an invalidation. */ + if (!cp_iova_pinned(&private->cp, iova, length)) + return; - cp_free(&private->cp); - return NOTIFY_OK; - } - - return NOTIFY_DONE; + vfio_ccw_mdev_reset(private); } static ssize_t name_show(struct mdev_type *mtype, @@ -128,11 +102,8 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) vfio_init_group_dev(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - private->mdev = mdev; - private->state = VFIO_CCW_STATE_IDLE; - - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: create\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); @@ -145,8 +116,6 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) err_atomic: vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); - private->mdev = NULL; - private->state = VFIO_CCW_STATE_IDLE; return ret; } @@ -154,23 +123,14 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: remove\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); vfio_unregister_group_dev(&private->vdev); - if ((private->state != VFIO_CCW_STATE_NOT_OPER) && - (private->state != VFIO_CCW_STATE_STANDBY)) { - if (!vfio_ccw_sch_quiesce(private->sch)) - private->state = VFIO_CCW_STATE_STANDBY; - /* The state will be NOT_OPER on error. */ - } - vfio_uninit_group_dev(&private->vdev); - cp_free(&private->cp); - private->mdev = NULL; atomic_inc(&private->avail); } @@ -178,19 +138,15 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; - private->nb.notifier_call = vfio_ccw_mdev_notifier; - - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, - &events, &private->nb); - if (ret) - return ret; + /* Device cannot simply be opened again from this state */ + if (private->state == VFIO_CCW_STATE_NOT_OPER) + return -EINVAL; ret = vfio_ccw_register_async_dev_regions(private); if (ret) - goto out_unregister; + return ret; ret = vfio_ccw_register_schib_dev_regions(private); if (ret) @@ -200,11 +156,16 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) if (ret) goto out_unregister; + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); + if (private->state == VFIO_CCW_STATE_NOT_OPER) { + ret = -EINVAL; + goto out_unregister; + } + return ret; out_unregister: vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); return ret; } @@ -213,16 +174,8 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - if ((private->state != VFIO_CCW_STATE_NOT_OPER) && - (private->state != VFIO_CCW_STATE_STANDBY)) { - if (!vfio_ccw_mdev_reset(private)) - private->state = VFIO_CCW_STATE_STANDBY; - /* The state will be NOT_OPER on error. */ - } - - cp_free(&private->cp); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, @@ -645,6 +598,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, + .dma_unmap = vfio_ccw_dma_unmap, }; struct mdev_driver vfio_ccw_mdev_driver = { @@ -657,13 +611,3 @@ struct mdev_driver vfio_ccw_mdev_driver = { .remove = vfio_ccw_mdev_remove, .supported_type_groups = mdev_type_groups, }; - -int vfio_ccw_mdev_reg(struct subchannel *sch) -{ - return mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); -} - -void vfio_ccw_mdev_unreg(struct subchannel *sch) -{ - mdev_unregister_device(&sch->dev); -} diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 7272eb788612..cd24b7fada91 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -73,8 +73,6 @@ struct vfio_ccw_crw { * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @avail: available for creating a mediated device - * @mdev: pointer to the mediated device - * @nb: notifier for vfio events * @io_region: MMIO region to input/output I/O arguments/results * @io_mutex: protect against concurrent update of I/O regions * @region: additional regions for other subchannel operations @@ -97,8 +95,6 @@ struct vfio_ccw_private { int state; struct completion *completion; atomic_t avail; - struct mdev_device *mdev; - struct notifier_block nb; struct ccw_io_region *io_region; struct mutex io_mutex; struct vfio_ccw_region *region; @@ -119,10 +115,7 @@ struct vfio_ccw_private { struct work_struct crw_work; } __aligned(8); -extern int vfio_ccw_mdev_reg(struct subchannel *sch); -extern void vfio_ccw_mdev_unreg(struct subchannel *sch); - -extern int vfio_ccw_sch_quiesce(struct subchannel *sch); +int vfio_ccw_sch_quiesce(struct subchannel *sch); extern struct mdev_driver vfio_ccw_mdev_driver; @@ -147,6 +140,8 @@ enum vfio_ccw_event { VFIO_CCW_EVENT_IO_REQ, VFIO_CCW_EVENT_INTERRUPT, VFIO_CCW_EVENT_ASYNC_REQ, + VFIO_CCW_EVENT_OPEN, + VFIO_CCW_EVENT_CLOSE, /* last element! */ NR_VFIO_CCW_EVENTS }; @@ -158,7 +153,7 @@ typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event); extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS]; static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, - int event) + enum vfio_ccw_event event) { trace_vfio_ccw_fsm_event(private->sch->schid, private->state, event); vfio_ccw_jumptable[private->state][event](private, event); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 5c13d2079d96..8f1d1cf23d44 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -27,6 +27,7 @@ #include <linux/kthread.h> #include <linux/mutex.h> #include <asm/airq.h> +#include <asm/tpi.h> #include <linux/atomic.h> #include <asm/isc.h> #include <linux/hrtimer.h> @@ -131,7 +132,8 @@ static int ap_max_adapter_id = 63; static struct bus_type ap_bus_type; /* Adapter interrupt definitions */ -static void ap_interrupt_handler(struct airq_struct *airq, bool floating); +static void ap_interrupt_handler(struct airq_struct *airq, + struct tpi_info *tpi_info); static bool ap_irq_flag; @@ -452,9 +454,10 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) /** * ap_interrupt_handler() - Schedule ap_tasklet on interrupt * @airq: pointer to adapter interrupt descriptor - * @floating: ignored + * @tpi_info: ignored */ -static void ap_interrupt_handler(struct airq_struct *airq, bool floating) +static void ap_interrupt_handler(struct airq_struct *airq, + struct tpi_info *tpi_info) { inc_irq_stat(IRQIO_APB); tasklet_schedule(&ap_tasklet); @@ -835,6 +838,17 @@ static void ap_bus_revise_bindings(void) bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_revise_reserved); } +/** + * ap_owned_by_def_drv: indicates whether an AP adapter is reserved for the + * default host driver or not. + * @card: the APID of the adapter card to check + * @queue: the APQI of the queue to check + * + * Note: the ap_perms_mutex must be locked by the caller of this function. + * + * Return: an int specifying whether the AP adapter is reserved for the host (1) + * or not (0). + */ int ap_owned_by_def_drv(int card, int queue) { int rc = 0; @@ -842,25 +856,31 @@ int ap_owned_by_def_drv(int card, int queue) if (card < 0 || card >= AP_DEVICES || queue < 0 || queue >= AP_DOMAINS) return -EINVAL; - mutex_lock(&ap_perms_mutex); - if (test_bit_inv(card, ap_perms.apm) && test_bit_inv(queue, ap_perms.aqm)) rc = 1; - mutex_unlock(&ap_perms_mutex); - return rc; } EXPORT_SYMBOL(ap_owned_by_def_drv); +/** + * ap_apqn_in_matrix_owned_by_def_drv: indicates whether every APQN contained in + * a set is reserved for the host drivers + * or not. + * @apm: a bitmap specifying a set of APIDs comprising the APQNs to check + * @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check + * + * Note: the ap_perms_mutex must be locked by the caller of this function. + * + * Return: an int specifying whether each APQN is reserved for the host (1) or + * not (0) + */ int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm, unsigned long *aqm) { int card, queue, rc = 0; - mutex_lock(&ap_perms_mutex); - for (card = 0; !rc && card < AP_DEVICES; card++) if (test_bit_inv(card, apm) && test_bit_inv(card, ap_perms.apm)) @@ -869,8 +889,6 @@ int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm, test_bit_inv(queue, ap_perms.aqm)) rc = 1; - mutex_unlock(&ap_perms_mutex); - return rc; } EXPORT_SYMBOL(ap_apqn_in_matrix_owned_by_def_drv); @@ -1435,7 +1453,7 @@ static int __verify_queue_reservations(struct device_driver *drv, void *data) if (ap_drv->in_use) { rc = ap_drv->in_use(ap_perms.apm, newaqm); if (rc) - return -EBUSY; + rc = -EBUSY; } /* release the driver's module */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index c48b0db824e3..a32457b4cbb8 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -34,7 +34,7 @@ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) qirqctrl.ir = 1; qirqctrl.isc = AP_ISC; - status = ap_aqic(aq->qid, qirqctrl, ind); + status = ap_aqic(aq->qid, qirqctrl, virt_to_phys(ind)); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_OTHERWISE_CHANGED: diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 7329caa7d467..5a05d1cdfec2 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -2115,5 +2115,5 @@ static void __exit pkey_exit(void) pkey_debug_exit(); } -module_cpu_feature_match(MSA, pkey_init); +module_cpu_feature_match(S390_CPU_FEATURE_MSA, pkey_init); module_exit(pkey_exit); diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 4ac9c6521ec1..f43cfeabd2cc 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -18,9 +18,6 @@ #define VFIO_AP_ROOT_NAME "vfio_ap" #define VFIO_AP_DEV_NAME "matrix" -#define AP_QUEUE_ASSIGNED "assigned" -#define AP_QUEUE_UNASSIGNED "unassigned" -#define AP_QUEUE_IN_USE "in use" MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018"); @@ -46,120 +43,12 @@ static struct ap_device_id ap_queue_ids[] = { { /* end of sibling */ }, }; -static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q) -{ - struct ap_matrix_mdev *matrix_mdev; - unsigned long apid = AP_QID_CARD(q->apqn); - unsigned long apqi = AP_QID_QUEUE(q->apqn); - - list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { - if (test_bit_inv(apid, matrix_mdev->matrix.apm) && - test_bit_inv(apqi, matrix_mdev->matrix.aqm)) - return matrix_mdev; - } - - return NULL; -} - -static ssize_t status_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - ssize_t nchars = 0; - struct vfio_ap_queue *q; - struct ap_matrix_mdev *matrix_mdev; - struct ap_device *apdev = to_ap_dev(dev); - - mutex_lock(&matrix_dev->lock); - q = dev_get_drvdata(&apdev->device); - matrix_mdev = vfio_ap_mdev_for_queue(q); - - if (matrix_mdev) { - if (matrix_mdev->kvm) - nchars = scnprintf(buf, PAGE_SIZE, "%s\n", - AP_QUEUE_IN_USE); - else - nchars = scnprintf(buf, PAGE_SIZE, "%s\n", - AP_QUEUE_ASSIGNED); - } else { - nchars = scnprintf(buf, PAGE_SIZE, "%s\n", - AP_QUEUE_UNASSIGNED); - } - - mutex_unlock(&matrix_dev->lock); - - return nchars; -} - -static DEVICE_ATTR_RO(status); - -static struct attribute *vfio_queue_attrs[] = { - &dev_attr_status.attr, - NULL, -}; - -static const struct attribute_group vfio_queue_attr_group = { - .attrs = vfio_queue_attrs, -}; - -/** - * vfio_ap_queue_dev_probe: Allocate a vfio_ap_queue structure and associate it - * with the device as driver_data. - * - * @apdev: the AP device being probed - * - * Return: returns 0 if the probe succeeded; otherwise, returns an error if - * storage could not be allocated for a vfio_ap_queue object or the - * sysfs 'status' attribute could not be created for the queue device. - */ -static int vfio_ap_queue_dev_probe(struct ap_device *apdev) -{ - int ret; - struct vfio_ap_queue *q; - - q = kzalloc(sizeof(*q), GFP_KERNEL); - if (!q) - return -ENOMEM; - - mutex_lock(&matrix_dev->lock); - dev_set_drvdata(&apdev->device, q); - q->apqn = to_ap_queue(&apdev->device)->qid; - q->saved_isc = VFIO_AP_ISC_INVALID; - - ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group); - if (ret) { - dev_set_drvdata(&apdev->device, NULL); - kfree(q); - } - - mutex_unlock(&matrix_dev->lock); - - return ret; -} - -/** - * vfio_ap_queue_dev_remove: Free the associated vfio_ap_queue structure. - * - * @apdev: the AP device being removed - * - * Takes the matrix lock to avoid actions on this device while doing the remove. - */ -static void vfio_ap_queue_dev_remove(struct ap_device *apdev) -{ - struct vfio_ap_queue *q; - - mutex_lock(&matrix_dev->lock); - sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group); - q = dev_get_drvdata(&apdev->device); - vfio_ap_mdev_reset_queue(q, 1); - dev_set_drvdata(&apdev->device, NULL); - kfree(q); - mutex_unlock(&matrix_dev->lock); -} - static struct ap_driver vfio_ap_drv = { - .probe = vfio_ap_queue_dev_probe, - .remove = vfio_ap_queue_dev_remove, + .probe = vfio_ap_mdev_probe_queue, + .remove = vfio_ap_mdev_remove_queue, + .in_use = vfio_ap_mdev_resource_in_use, + .on_config_changed = vfio_ap_on_cfg_changed, + .on_scan_complete = vfio_ap_on_scan_complete, .ids = ap_queue_ids, }; @@ -212,8 +101,9 @@ static int vfio_ap_matrix_dev_create(void) goto matrix_alloc_err; } - mutex_init(&matrix_dev->lock); + mutex_init(&matrix_dev->mdevs_lock); INIT_LIST_HEAD(&matrix_dev->mdev_list); + mutex_init(&matrix_dev->guests_lock); dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME); matrix_dev->device.parent = root_device; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a7d2a95796d3..6c8c41fac4e1 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -26,44 +26,193 @@ #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" -static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); +#define AP_QUEUE_ASSIGNED "assigned" +#define AP_QUEUE_UNASSIGNED "unassigned" +#define AP_QUEUE_IN_USE "in use" + +static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static const struct vfio_device_ops vfio_ap_matrix_dev_ops; +static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, unsigned int retry); + +/** + * get_update_locks_for_kvm: Acquire the locks required to dynamically update a + * KVM guest's APCB in the proper order. + * + * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB. + * + * The proper locking order is: + * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM + * guest's APCB. + * 2. kvm->lock: required to update a guest's APCB + * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev + * + * Note: If @kvm is NULL, the KVM lock will not be taken. + */ +static inline void get_update_locks_for_kvm(struct kvm *kvm) +{ + mutex_lock(&matrix_dev->guests_lock); + if (kvm) + mutex_lock(&kvm->lock); + mutex_lock(&matrix_dev->mdevs_lock); +} + +/** + * release_update_locks_for_kvm: Release the locks used to dynamically update a + * KVM guest's APCB in the proper order. + * + * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB. + * + * The proper unlocking order is: + * 1. matrix_dev->mdevs_lock + * 2. kvm->lock + * 3. matrix_dev->guests_lock + * + * Note: If @kvm is NULL, the KVM lock will not be released. + */ +static inline void release_update_locks_for_kvm(struct kvm *kvm) +{ + mutex_unlock(&matrix_dev->mdevs_lock); + if (kvm) + mutex_unlock(&kvm->lock); + mutex_unlock(&matrix_dev->guests_lock); +} + +/** + * get_update_locks_for_mdev: Acquire the locks required to dynamically update a + * KVM guest's APCB in the proper order. + * + * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP + * configuration data to use to update a KVM guest's APCB. + * + * The proper locking order is: + * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM + * guest's APCB. + * 2. matrix_mdev->kvm->lock: required to update a guest's APCB + * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev + * + * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM + * lock will not be taken. + */ +static inline void get_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev) +{ + mutex_lock(&matrix_dev->guests_lock); + if (matrix_mdev && matrix_mdev->kvm) + mutex_lock(&matrix_mdev->kvm->lock); + mutex_lock(&matrix_dev->mdevs_lock); +} + +/** + * release_update_locks_for_mdev: Release the locks used to dynamically update a + * KVM guest's APCB in the proper order. + * + * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP + * configuration data to use to update a KVM guest's APCB. + * + * The proper unlocking order is: + * 1. matrix_dev->mdevs_lock + * 2. matrix_mdev->kvm->lock + * 3. matrix_dev->guests_lock + * + * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM + * lock will not be released. + */ +static inline void release_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev) +{ + mutex_unlock(&matrix_dev->mdevs_lock); + if (matrix_mdev && matrix_mdev->kvm) + mutex_unlock(&matrix_mdev->kvm->lock); + mutex_unlock(&matrix_dev->guests_lock); +} -static int match_apqn(struct device *dev, const void *data) +/** + * get_update_locks_by_apqn: Find the mdev to which an APQN is assigned and + * acquire the locks required to update the APCB of + * the KVM guest to which the mdev is attached. + * + * @apqn: the APQN of a queue device. + * + * The proper locking order is: + * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM + * guest's APCB. + * 2. matrix_mdev->kvm->lock: required to update a guest's APCB + * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev + * + * Note: If @apqn is not assigned to a matrix_mdev, the matrix_mdev->kvm->lock + * will not be taken. + * + * Return: the ap_matrix_mdev object to which @apqn is assigned or NULL if @apqn + * is not assigned to an ap_matrix_mdev. + */ +static struct ap_matrix_mdev *get_update_locks_by_apqn(int apqn) { - struct vfio_ap_queue *q = dev_get_drvdata(dev); + struct ap_matrix_mdev *matrix_mdev; + + mutex_lock(&matrix_dev->guests_lock); + + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + if (test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm) && + test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) { + if (matrix_mdev->kvm) + mutex_lock(&matrix_mdev->kvm->lock); + + mutex_lock(&matrix_dev->mdevs_lock); + + return matrix_mdev; + } + } - return (q->apqn == *(int *)(data)) ? 1 : 0; + mutex_lock(&matrix_dev->mdevs_lock); + + return NULL; } /** - * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list - * @matrix_mdev: the associated mediated matrix - * @apqn: The queue APQN + * get_update_locks_for_queue: get the locks required to update the APCB of the + * KVM guest to which the matrix mdev linked to a + * vfio_ap_queue object is attached. + * + * @q: a pointer to a vfio_ap_queue object. * - * Retrieve a queue with a specific APQN from the list of the - * devices of the vfio_ap_drv. - * Verify that the APID and the APQI are set in the matrix. + * The proper locking order is: + * 1. q->matrix_dev->guests_lock: required to use the KVM pointer to update a + * KVM guest's APCB. + * 2. q->matrix_mdev->kvm->lock: required to update a guest's APCB + * 3. matrix_dev->mdevs_lock: required to access data stored in matrix_mdev + * + * Note: if @queue is not linked to an ap_matrix_mdev object, the KVM lock + * will not be taken. + */ +static inline void get_update_locks_for_queue(struct vfio_ap_queue *q) +{ + mutex_lock(&matrix_dev->guests_lock); + if (q->matrix_mdev && q->matrix_mdev->kvm) + mutex_lock(&q->matrix_mdev->kvm->lock); + mutex_lock(&matrix_dev->mdevs_lock); +} + +/** + * vfio_ap_mdev_get_queue - retrieve a queue with a specific APQN from a + * hash table of queues assigned to a matrix mdev + * @matrix_mdev: the matrix mdev + * @apqn: The APQN of a queue device * - * Return: the pointer to the associated vfio_ap_queue + * Return: the pointer to the vfio_ap_queue struct representing the queue or + * NULL if the queue is not assigned to @matrix_mdev */ -static struct vfio_ap_queue *vfio_ap_get_queue( +static struct vfio_ap_queue *vfio_ap_mdev_get_queue( struct ap_matrix_mdev *matrix_mdev, int apqn) { struct vfio_ap_queue *q; - if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) - return NULL; - if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) - return NULL; - - q = vfio_ap_find_queue(apqn); - if (q) - q->matrix_mdev = matrix_mdev; + hash_for_each_possible(matrix_mdev->qtable.queues, q, mdev_qnode, + apqn) { + if (q && q->apqn == apqn) + return q; + } - return q; + return NULL; } /** @@ -112,7 +261,7 @@ static void vfio_ap_wait_for_irqclear(int apqn) * * Unregisters the ISC in the GIB when the saved ISC not invalid. * Unpins the guest's page holding the NIB when it exists. - * Resets the saved_pfn and saved_isc to invalid values. + * Resets the saved_iova and saved_isc to invalid values. */ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) { @@ -123,9 +272,9 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); q->saved_isc = VFIO_AP_ISC_INVALID; } - if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { - vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1); - q->saved_pfn = 0; + if (q->saved_iova && !WARN_ON(!q->matrix_mdev)) { + vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_iova, 1); + q->saved_iova = 0; } } @@ -154,7 +303,7 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) int retries = 5; do { - status = ap_aqic(q->apqn, aqic_gisa, NULL); + status = ap_aqic(q->apqn, aqic_gisa, 0); switch (status.response_code) { case AP_RESPONSE_OTHERWISE_CHANGED: case AP_RESPONSE_NORMAL: @@ -180,7 +329,6 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) status.response_code); end_free: vfio_ap_free_aqic_resources(q); - q->matrix_mdev = NULL; return status; } @@ -189,27 +337,19 @@ end_free: * * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. * @nib: the location for storing the nib address. - * @g_pfn: the location for storing the page frame number of the page containing - * the nib. * * When the PQAP(AQIC) instruction is executed, general register 2 contains the * address of the notification indicator byte (nib) used for IRQ notification. - * This function parses the nib from gr2 and calculates the page frame - * number for the guest of the page containing the nib. The values are - * stored in @nib and @g_pfn respectively. - * - * The g_pfn of the nib is then validated to ensure the nib address is valid. + * This function parses and validates the nib from gr2. * * Return: returns zero if the nib address is a valid; otherwise, returns * -EINVAL. */ -static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, - unsigned long *g_pfn) +static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib) { *nib = vcpu->run->s.regs.gprs[2]; - *g_pfn = *nib >> PAGE_SHIFT; - if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) + if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *nib >> PAGE_SHIFT))) return -EINVAL; return 0; @@ -239,33 +379,34 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, struct kvm_vcpu *vcpu) { - unsigned long nib; struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + struct page *h_page; int nisc; struct kvm *kvm; - unsigned long h_nib, g_pfn, h_pfn; + phys_addr_t h_nib; + dma_addr_t nib; int ret; /* Verify that the notification indicator byte address is valid */ - if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { - VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", - __func__, nib, g_pfn, q->apqn); + if (vfio_ap_validate_nib(vcpu, &nib)) { + VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%pad, apqn=%#04x\n", + __func__, &nib, q->apqn); status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; } - ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1, - IOMMU_READ | IOMMU_WRITE, &h_pfn); + ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, + IOMMU_READ | IOMMU_WRITE, &h_page); switch (ret) { case 1: break; default: VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," - "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", - __func__, ret, nib, g_pfn, q->apqn); + "nib=%pad, apqn=%#04x\n", + __func__, ret, &nib, q->apqn); status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; @@ -274,7 +415,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, kvm = q->matrix_mdev->kvm; gisa = kvm->arch.gisa_int.origin; - h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); + h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; nisc = kvm_s390_gisc_register(kvm, isc); @@ -290,17 +431,17 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, aqic_gisa.ir = 1; aqic_gisa.gisa = (uint64_t)gisa >> 4; - status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); + status = ap_aqic(q->apqn, aqic_gisa, h_nib); switch (status.response_code) { case AP_RESPONSE_NORMAL: /* See if we did clear older IRQ configuration */ vfio_ap_free_aqic_resources(q); - q->saved_pfn = g_pfn; + q->saved_iova = nib; q->saved_isc = isc; break; case AP_RESPONSE_OTHERWISE_CHANGED: /* We could not modify IRQ setings: clear new configuration */ - vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); kvm_s390_gisc_unregister(kvm, isc); break; default: @@ -406,10 +547,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); + if (!vcpu->kvm->arch.crypto.pqap_hook) { VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", __func__, apqn); + goto out_unlock; } @@ -425,7 +568,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) goto out_unlock; } - q = vfio_ap_get_queue(matrix_mdev, apqn); + q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); if (!q) { VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", __func__, AP_QID_CARD(apqn), @@ -444,7 +587,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) out_unlock: memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); vcpu->run->s.regs.gprs[1] >>= 32; - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return 0; } @@ -456,6 +599,91 @@ static void vfio_ap_matrix_init(struct ap_config_info *info, matrix->adm_max = info->apxa ? info->Nd : 15; } +static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) +{ + if (matrix_mdev->kvm) + kvm_arch_crypto_set_masks(matrix_mdev->kvm, + matrix_mdev->shadow_apcb.apm, + matrix_mdev->shadow_apcb.aqm, + matrix_mdev->shadow_apcb.adm); +} + +static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) +{ + DECLARE_BITMAP(prev_shadow_adm, AP_DOMAINS); + + bitmap_copy(prev_shadow_adm, matrix_mdev->shadow_apcb.adm, AP_DOMAINS); + bitmap_and(matrix_mdev->shadow_apcb.adm, matrix_mdev->matrix.adm, + (unsigned long *)matrix_dev->info.adm, AP_DOMAINS); + + return !bitmap_equal(prev_shadow_adm, matrix_mdev->shadow_apcb.adm, + AP_DOMAINS); +} + +/* + * vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev + * to ensure no queue devices are passed through to + * the guest that are not bound to the vfio_ap + * device driver. + * + * @matrix_mdev: the matrix mdev whose matrix is to be filtered. + * + * Note: If an APQN referencing a queue device that is not bound to the vfio_ap + * driver, its APID will be filtered from the guest's APCB. The matrix + * structure precludes filtering an individual APQN, so its APID will be + * filtered. + * + * Return: a boolean value indicating whether the KVM guest's APCB was changed + * by the filtering or not. + */ +static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, + struct ap_matrix_mdev *matrix_mdev) +{ + unsigned long apid, apqi, apqn; + DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); + DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS); + struct vfio_ap_queue *q; + + bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES); + bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS); + vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); + + /* + * Copy the adapters, domains and control domains to the shadow_apcb + * from the matrix mdev, but only those that are assigned to the host's + * AP configuration. + */ + bitmap_and(matrix_mdev->shadow_apcb.apm, matrix_mdev->matrix.apm, + (unsigned long *)matrix_dev->info.apm, AP_DEVICES); + bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, + (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); + + for_each_set_bit_inv(apid, apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + /* + * If the APQN is not bound to the vfio_ap device + * driver, then we can't assign it to the guest's + * AP configuration. The AP architecture won't + * allow filtering of a single APQN, so let's filter + * the APID since an adapter represents a physical + * hardware device. + */ + apqn = AP_MKQID(apid, apqi); + q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); + if (!q || q->reset_rc) { + clear_bit_inv(apid, + matrix_mdev->shadow_apcb.apm); + break; + } + } + } + + return !bitmap_equal(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, + AP_DEVICES) || + !bitmap_equal(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS); +} + static int vfio_ap_mdev_probe(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev; @@ -475,20 +703,19 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) matrix_mdev->mdev = mdev; vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); matrix_mdev->pqap_hook = handle_pqap; - mutex_lock(&matrix_dev->lock); - list_add(&matrix_mdev->node, &matrix_dev->mdev_list); - mutex_unlock(&matrix_dev->lock); + vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); + hash_init(matrix_mdev->qtable.queues); ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); if (ret) goto err_list; dev_set_drvdata(&mdev->dev, matrix_mdev); + mutex_lock(&matrix_dev->mdevs_lock); + list_add(&matrix_mdev->node, &matrix_dev->mdev_list); + mutex_unlock(&matrix_dev->mdevs_lock); return 0; err_list: - mutex_lock(&matrix_dev->lock); - list_del(&matrix_mdev->node); - mutex_unlock(&matrix_dev->lock); vfio_uninit_group_dev(&matrix_mdev->vdev); kfree(matrix_mdev); err_dec_available: @@ -496,16 +723,62 @@ err_dec_available: return ret; } +static void vfio_ap_mdev_link_queue(struct ap_matrix_mdev *matrix_mdev, + struct vfio_ap_queue *q) +{ + if (q) { + q->matrix_mdev = matrix_mdev; + hash_add(matrix_mdev->qtable.queues, &q->mdev_qnode, q->apqn); + } +} + +static void vfio_ap_mdev_link_apqn(struct ap_matrix_mdev *matrix_mdev, int apqn) +{ + struct vfio_ap_queue *q; + + q = vfio_ap_find_queue(apqn); + vfio_ap_mdev_link_queue(matrix_mdev, q); +} + +static void vfio_ap_unlink_queue_fr_mdev(struct vfio_ap_queue *q) +{ + hash_del(&q->mdev_qnode); +} + +static void vfio_ap_unlink_mdev_fr_queue(struct vfio_ap_queue *q) +{ + q->matrix_mdev = NULL; +} + +static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev) +{ + struct vfio_ap_queue *q; + unsigned long apid, apqi; + + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, + AP_DOMAINS) { + q = vfio_ap_mdev_get_queue(matrix_mdev, + AP_MKQID(apid, apqi)); + if (q) + q->matrix_mdev = NULL; + } + } +} + static void vfio_ap_mdev_remove(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); vfio_unregister_group_dev(&matrix_mdev->vdev); - mutex_lock(&matrix_dev->lock); - vfio_ap_mdev_reset_queues(matrix_mdev); + mutex_lock(&matrix_dev->guests_lock); + mutex_lock(&matrix_dev->mdevs_lock); + vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_unlink_fr_queues(matrix_mdev); list_del(&matrix_mdev->node); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_dev->guests_lock); vfio_uninit_group_dev(&matrix_mdev->vdev); kfree(matrix_mdev); atomic_inc(&matrix_dev->available_instances); @@ -554,141 +827,48 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = { NULL, }; -struct vfio_ap_queue_reserved { - unsigned long *apid; - unsigned long *apqi; - bool reserved; -}; - -/** - * vfio_ap_has_queue - determines if the AP queue containing the target in @data - * - * @dev: an AP queue device - * @data: a struct vfio_ap_queue_reserved reference - * - * Flags whether the AP queue device (@dev) has a queue ID containing the APQN, - * apid or apqi specified in @data: - * - * - If @data contains both an apid and apqi value, then @data will be flagged - * as reserved if the APID and APQI fields for the AP queue device matches - * - * - If @data contains only an apid value, @data will be flagged as - * reserved if the APID field in the AP queue device matches - * - * - If @data contains only an apqi value, @data will be flagged as - * reserved if the APQI field in the AP queue device matches - * - * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if - * @data does not contain either an apid or apqi. - */ -static int vfio_ap_has_queue(struct device *dev, void *data) -{ - struct vfio_ap_queue_reserved *qres = data; - struct ap_queue *ap_queue = to_ap_queue(dev); - ap_qid_t qid; - unsigned long id; - - if (qres->apid && qres->apqi) { - qid = AP_MKQID(*qres->apid, *qres->apqi); - if (qid == ap_queue->qid) - qres->reserved = true; - } else if (qres->apid && !qres->apqi) { - id = AP_QID_CARD(ap_queue->qid); - if (id == *qres->apid) - qres->reserved = true; - } else if (!qres->apid && qres->apqi) { - id = AP_QID_QUEUE(ap_queue->qid); - if (id == *qres->apqi) - qres->reserved = true; - } else { - return -EINVAL; - } - - return 0; -} - -/** - * vfio_ap_verify_queue_reserved - verifies that the AP queue containing - * @apid or @aqpi is reserved - * - * @apid: an AP adapter ID - * @apqi: an AP queue index - * - * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device - * driver according to the following rules: - * - * - If both @apid and @apqi are not NULL, then there must be an AP queue - * device bound to the vfio_ap driver with the APQN identified by @apid and - * @apqi - * - * - If only @apid is not NULL, then there must be an AP queue device bound - * to the vfio_ap driver with an APQN containing @apid - * - * - If only @apqi is not NULL, then there must be an AP queue device bound - * to the vfio_ap driver with an APQN containing @apqi - * - * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL. - */ -static int vfio_ap_verify_queue_reserved(unsigned long *apid, - unsigned long *apqi) -{ - int ret; - struct vfio_ap_queue_reserved qres; - - qres.apid = apid; - qres.apqi = apqi; - qres.reserved = false; - - ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL, - &qres, vfio_ap_has_queue); - if (ret) - return ret; - - if (qres.reserved) - return 0; - - return -EADDRNOTAVAIL; -} +#define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \ + "already assigned to %s" -static int -vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev, - unsigned long apid) +static void vfio_ap_mdev_log_sharing_err(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm, + unsigned long *aqm) { - int ret; - unsigned long apqi; - unsigned long nbits = matrix_mdev->matrix.aqm_max + 1; - - if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits) - return vfio_ap_verify_queue_reserved(&apid, NULL); - - for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) { - ret = vfio_ap_verify_queue_reserved(&apid, &apqi); - if (ret) - return ret; - } + unsigned long apid, apqi; + const struct device *dev = mdev_dev(matrix_mdev->mdev); + const char *mdev_name = dev_name(dev); - return 0; + for_each_set_bit_inv(apid, apm, AP_DEVICES) + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) + dev_warn(dev, MDEV_SHARING_ERR, apid, apqi, mdev_name); } /** - * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured + * vfio_ap_mdev_verify_no_sharing - verify APQNs are not shared by matrix mdevs * - * @matrix_mdev: the mediated matrix device + * @mdev_apm: mask indicating the APIDs of the APQNs to be verified + * @mdev_aqm: mask indicating the APQIs of the APQNs to be verified * - * Verifies that the APQNs derived from the cross product of the AP adapter IDs - * and AP queue indexes comprising the AP matrix are not configured for another + * Verifies that each APQN derived from the Cartesian product of a bitmap of + * AP adapter IDs and AP queue indexes is not configured for any matrix * mediated device. AP queue sharing is not allowed. * - * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE. + * Return: 0 if the APQNs are not shared; otherwise return -EADDRINUSE. */ -static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) +static int vfio_ap_mdev_verify_no_sharing(unsigned long *mdev_apm, + unsigned long *mdev_aqm) { - struct ap_matrix_mdev *lstdev; + struct ap_matrix_mdev *matrix_mdev; DECLARE_BITMAP(apm, AP_DEVICES); DECLARE_BITMAP(aqm, AP_DOMAINS); - list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) { - if (matrix_mdev == lstdev) + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + /* + * If the input apm and aqm are fields of the matrix_mdev + * object, then move on to the next matrix_mdev. + */ + if (mdev_apm == matrix_mdev->matrix.apm && + mdev_aqm == matrix_mdev->matrix.aqm) continue; memset(apm, 0, sizeof(apm)); @@ -698,14 +878,16 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) * We work on full longs, as we can only exclude the leftover * bits in non-inverse order. The leftover is all zeros. */ - if (!bitmap_and(apm, matrix_mdev->matrix.apm, - lstdev->matrix.apm, AP_DEVICES)) + if (!bitmap_and(apm, mdev_apm, matrix_mdev->matrix.apm, + AP_DEVICES)) continue; - if (!bitmap_and(aqm, matrix_mdev->matrix.aqm, - lstdev->matrix.aqm, AP_DOMAINS)) + if (!bitmap_and(aqm, mdev_aqm, matrix_mdev->matrix.aqm, + AP_DOMAINS)) continue; + vfio_ap_mdev_log_sharing_err(matrix_mdev, apm, aqm); + return -EADDRINUSE; } @@ -713,6 +895,41 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) } /** + * vfio_ap_mdev_validate_masks - verify that the APQNs assigned to the mdev are + * not reserved for the default zcrypt driver and + * are not assigned to another mdev. + * + * @matrix_mdev: the mdev to which the APQNs being validated are assigned. + * + * Return: One of the following values: + * o the error returned from the ap_apqn_in_matrix_owned_by_def_drv() function, + * most likely -EBUSY indicating the ap_perms_mutex lock is already held. + * o EADDRNOTAVAIL if an APQN assigned to @matrix_mdev is reserved for the + * zcrypt default driver. + * o EADDRINUSE if an APQN assigned to @matrix_mdev is assigned to another mdev + * o A zero indicating validation succeeded. + */ +static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev) +{ + if (ap_apqn_in_matrix_owned_by_def_drv(matrix_mdev->matrix.apm, + matrix_mdev->matrix.aqm)) + return -EADDRNOTAVAIL; + + return vfio_ap_mdev_verify_no_sharing(matrix_mdev->matrix.apm, + matrix_mdev->matrix.aqm); +} + +static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + unsigned long apqi; + + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) + vfio_ap_mdev_link_apqn(matrix_mdev, + AP_MKQID(apid, apqi)); +} + +/** * assign_adapter_store - parses the APID from @buf and sets the * corresponding bit in the mediated matrix device's APM * @@ -741,6 +958,10 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) * An APQN derived from the cross product of the APID being assigned * and the APQIs previously assigned is being used by another mediated * matrix device + * + * 5. -EAGAIN + * A lock required to validate the mdev's AP configuration could not + * be obtained. */ static ssize_t assign_adapter_store(struct device *dev, struct device_attribute *attr, @@ -748,15 +969,11 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; + DECLARE_BITMAP(apm_delta, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); - - /* If the KVM guest is running, disallow assignment of adapter */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + mutex_lock(&ap_perms_mutex); + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &apid); if (ret) @@ -767,33 +984,97 @@ static ssize_t assign_adapter_store(struct device *dev, goto done; } - /* - * Set the bit in the AP mask (APM) corresponding to the AP adapter - * number (APID). The bits in the mask, from most significant to least - * significant bit, correspond to APIDs 0-255. - */ - ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid); - if (ret) + set_bit_inv(apid, matrix_mdev->matrix.apm); + + ret = vfio_ap_mdev_validate_masks(matrix_mdev); + if (ret) { + clear_bit_inv(apid, matrix_mdev->matrix.apm); goto done; + } - set_bit_inv(apid, matrix_mdev->matrix.apm); + vfio_ap_mdev_link_adapter(matrix_mdev, apid); + memset(apm_delta, 0, sizeof(apm_delta)); + set_bit_inv(apid, apm_delta); - ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); - if (ret) - goto share_err; + if (vfio_ap_mdev_filter_matrix(apm_delta, + matrix_mdev->matrix.aqm, matrix_mdev)) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; - goto done; - -share_err: - clear_bit_inv(apid, matrix_mdev->matrix.apm); done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); + mutex_unlock(&ap_perms_mutex); return ret; } static DEVICE_ATTR_WO(assign_adapter); +static struct vfio_ap_queue +*vfio_ap_unlink_apqn_fr_mdev(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid, unsigned long apqi) +{ + struct vfio_ap_queue *q = NULL; + + q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi)); + /* If the queue is assigned to the matrix mdev, unlink it. */ + if (q) + vfio_ap_unlink_queue_fr_mdev(q); + + return q; +} + +/** + * vfio_ap_mdev_unlink_adapter - unlink all queues associated with unassigned + * adapter from the matrix mdev to which the + * adapter was assigned. + * @matrix_mdev: the matrix mediated device to which the adapter was assigned. + * @apid: the APID of the unassigned adapter. + * @qtable: table for storing queues associated with unassigned adapter. + */ +static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid, + struct ap_queue_table *qtable) +{ + unsigned long apqi; + struct vfio_ap_queue *q; + + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { + q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); + + if (q && qtable) { + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) + hash_add(qtable->queues, &q->mdev_qnode, + q->apqn); + } + } +} + +static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + int loop_cursor; + struct vfio_ap_queue *q; + struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + + hash_init(qtable->queues); + vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable); + + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + } + + vfio_ap_mdev_reset_queues(qtable); + + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + vfio_ap_unlink_mdev_fr_queue(q); + hash_del(&q->mdev_qnode); + } + + kfree(qtable); +} + /** * unassign_adapter_store - parses the APID from @buf and clears the * corresponding bit in the mediated matrix device's APM @@ -817,13 +1098,7 @@ static ssize_t unassign_adapter_store(struct device *dev, unsigned long apid; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); - - /* If the KVM guest is running, disallow unassignment of adapter */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &apid); if (ret) @@ -835,31 +1110,22 @@ static ssize_t unassign_adapter_store(struct device *dev, } clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); + vfio_ap_mdev_hot_unplug_adapter(matrix_mdev, apid); ret = count; done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); return ret; } static DEVICE_ATTR_WO(unassign_adapter); -static int -vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, - unsigned long apqi) +static void vfio_ap_mdev_link_domain(struct ap_matrix_mdev *matrix_mdev, + unsigned long apqi) { - int ret; unsigned long apid; - unsigned long nbits = matrix_mdev->matrix.apm_max + 1; - - if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits) - return vfio_ap_verify_queue_reserved(NULL, &apqi); - for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) { - ret = vfio_ap_verify_queue_reserved(&apid, &apqi); - if (ret) - return ret; - } - - return 0; + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) + vfio_ap_mdev_link_apqn(matrix_mdev, + AP_MKQID(apid, apqi)); } /** @@ -891,6 +1157,10 @@ vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, * An APQN derived from the cross product of the APQI being assigned * and the APIDs previously assigned is being used by another mediated * matrix device + * + * 5. -EAGAIN + * The lock required to validate the mdev's AP configuration could not + * be obtained. */ static ssize_t assign_domain_store(struct device *dev, struct device_attribute *attr, @@ -898,47 +1168,89 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; + DECLARE_BITMAP(aqm_delta, AP_DOMAINS); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - unsigned long max_apqi = matrix_mdev->matrix.aqm_max; - - mutex_lock(&matrix_dev->lock); - /* If the KVM guest is running, disallow assignment of domain */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + mutex_lock(&ap_perms_mutex); + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &apqi); if (ret) goto done; - if (apqi > max_apqi) { + + if (apqi > matrix_mdev->matrix.aqm_max) { ret = -ENODEV; goto done; } - ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi); - if (ret) + set_bit_inv(apqi, matrix_mdev->matrix.aqm); + + ret = vfio_ap_mdev_validate_masks(matrix_mdev); + if (ret) { + clear_bit_inv(apqi, matrix_mdev->matrix.aqm); goto done; + } - set_bit_inv(apqi, matrix_mdev->matrix.aqm); + vfio_ap_mdev_link_domain(matrix_mdev, apqi); + memset(aqm_delta, 0, sizeof(aqm_delta)); + set_bit_inv(apqi, aqm_delta); - ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); - if (ret) - goto share_err; + if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, + matrix_mdev)) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; - goto done; - -share_err: - clear_bit_inv(apqi, matrix_mdev->matrix.aqm); done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); + mutex_unlock(&ap_perms_mutex); return ret; } static DEVICE_ATTR_WO(assign_domain); +static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, + unsigned long apqi, + struct ap_queue_table *qtable) +{ + unsigned long apid; + struct vfio_ap_queue *q; + + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { + q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); + + if (q && qtable) { + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) + hash_add(qtable->queues, &q->mdev_qnode, + q->apqn); + } + } +} + +static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, + unsigned long apqi) +{ + int loop_cursor; + struct vfio_ap_queue *q; + struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + + hash_init(qtable->queues); + vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable); + + if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { + clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + } + + vfio_ap_mdev_reset_queues(qtable); + + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + vfio_ap_unlink_mdev_fr_queue(q); + hash_del(&q->mdev_qnode); + } + + kfree(qtable); +} /** * unassign_domain_store - parses the APQI from @buf and clears the @@ -963,13 +1275,7 @@ static ssize_t unassign_domain_store(struct device *dev, unsigned long apqi; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); - - /* If the KVM guest is running, disallow unassignment of domain */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &apqi); if (ret) @@ -981,10 +1287,11 @@ static ssize_t unassign_domain_store(struct device *dev, } clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); + vfio_ap_mdev_hot_unplug_domain(matrix_mdev, apqi); ret = count; done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); return ret; } static DEVICE_ATTR_WO(unassign_domain); @@ -1011,13 +1318,7 @@ static ssize_t assign_control_domain_store(struct device *dev, unsigned long id; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); - - /* If the KVM guest is running, disallow assignment of control domain */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &id); if (ret) @@ -1034,9 +1335,12 @@ static ssize_t assign_control_domain_store(struct device *dev, * number of control domains that can be assigned. */ set_bit_inv(id, matrix_mdev->matrix.adm); + if (vfio_ap_mdev_filter_cdoms(matrix_mdev)) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + ret = count; done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); return ret; } static DEVICE_ATTR_WO(assign_control_domain); @@ -1062,28 +1366,28 @@ static ssize_t unassign_control_domain_store(struct device *dev, int ret; unsigned long domid; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - unsigned long max_domid = matrix_mdev->matrix.adm_max; - - mutex_lock(&matrix_dev->lock); - /* If a KVM guest is running, disallow unassignment of control domain */ - if (matrix_mdev->kvm) { - ret = -EBUSY; - goto done; - } + get_update_locks_for_mdev(matrix_mdev); ret = kstrtoul(buf, 0, &domid); if (ret) goto done; - if (domid > max_domid) { + + if (domid > matrix_mdev->matrix.adm_max) { ret = -ENODEV; goto done; } clear_bit_inv(domid, matrix_mdev->matrix.adm); + + if (test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) { + clear_bit_inv(domid, matrix_mdev->shadow_apcb.adm); + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + } + ret = count; done: - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_mdev(matrix_mdev); return ret; } static DEVICE_ATTR_WO(unassign_control_domain); @@ -1099,40 +1403,36 @@ static ssize_t control_domains_show(struct device *dev, struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_domid = matrix_mdev->matrix.adm_max; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { n = sprintf(bufpos, "%04lx\n", id); bufpos += n; nchars += n; } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return nchars; } static DEVICE_ATTR_RO(control_domains); -static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t vfio_ap_mdev_matrix_show(struct ap_matrix *matrix, char *buf) { - struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); char *bufpos = buf; unsigned long apid; unsigned long apqi; unsigned long apid1; unsigned long apqi1; - unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1; - unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1; + unsigned long napm_bits = matrix->apm_max + 1; + unsigned long naqm_bits = matrix->aqm_max + 1; int nchars = 0; int n; - apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); - apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); - - mutex_lock(&matrix_dev->lock); + apid1 = find_first_bit_inv(matrix->apm, napm_bits); + apqi1 = find_first_bit_inv(matrix->aqm, naqm_bits); if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { - for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { - for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, + for_each_set_bit_inv(apid, matrix->apm, napm_bits) { + for_each_set_bit_inv(apqi, matrix->aqm, naqm_bits) { n = sprintf(bufpos, "%02lx.%04lx\n", apid, apqi); @@ -1141,25 +1441,50 @@ static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, } } } else if (apid1 < napm_bits) { - for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { + for_each_set_bit_inv(apid, matrix->apm, napm_bits) { n = sprintf(bufpos, "%02lx.\n", apid); bufpos += n; nchars += n; } } else if (apqi1 < naqm_bits) { - for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) { + for_each_set_bit_inv(apqi, matrix->aqm, naqm_bits) { n = sprintf(bufpos, ".%04lx\n", apqi); bufpos += n; nchars += n; } } - mutex_unlock(&matrix_dev->lock); + return nchars; +} + +static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ssize_t nchars; + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); + + mutex_lock(&matrix_dev->mdevs_lock); + nchars = vfio_ap_mdev_matrix_show(&matrix_mdev->matrix, buf); + mutex_unlock(&matrix_dev->mdevs_lock); return nchars; } static DEVICE_ATTR_RO(matrix); +static ssize_t guest_matrix_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t nchars; + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); + + mutex_lock(&matrix_dev->mdevs_lock); + nchars = vfio_ap_mdev_matrix_show(&matrix_mdev->shadow_apcb, buf); + mutex_unlock(&matrix_dev->mdevs_lock); + + return nchars; +} +static DEVICE_ATTR_RO(guest_matrix); + static struct attribute *vfio_ap_mdev_attrs[] = { &dev_attr_assign_adapter.attr, &dev_attr_unassign_adapter.attr, @@ -1169,6 +1494,7 @@ static struct attribute *vfio_ap_mdev_attrs[] = { &dev_attr_unassign_control_domain.attr, &dev_attr_control_domains.attr, &dev_attr_matrix.attr, + &dev_attr_guest_matrix.attr, NULL, }; @@ -1201,59 +1527,32 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; up_write(&kvm->arch.crypto.pqap_hook_rwsem); - mutex_lock(&kvm->lock); - mutex_lock(&matrix_dev->lock); + get_update_locks_for_kvm(kvm); list_for_each_entry(m, &matrix_dev->mdev_list, node) { if (m != matrix_mdev && m->kvm == kvm) { - mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_kvm(kvm); return -EPERM; } } kvm_get_kvm(kvm); matrix_mdev->kvm = kvm; - kvm_arch_crypto_set_masks(kvm, - matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev->matrix.adm); + vfio_ap_mdev_update_guest_apcb(matrix_mdev); - mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_kvm(kvm); } return 0; } -/** - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback - * - * @nb: The notifier block - * @action: Action to be taken - * @data: data associated with the request - * - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we - * pinned before). Other requests are ignored. - * - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. - */ -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, + u64 length) { - struct ap_matrix_mdev *matrix_mdev; - - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; - - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); - return NOTIFY_OK; - } + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); - return NOTIFY_DONE; + vfio_unpin_pages(&matrix_mdev->vdev, iova, 1); } /** @@ -1271,36 +1570,36 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) kvm->arch.crypto.pqap_hook = NULL; up_write(&kvm->arch.crypto.pqap_hook_rwsem); - mutex_lock(&kvm->lock); - mutex_lock(&matrix_dev->lock); + get_update_locks_for_kvm(kvm); kvm_arch_crypto_clear_masks(kvm); - vfio_ap_mdev_reset_queues(matrix_mdev); + vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; - mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + release_update_locks_for_kvm(kvm); } } static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) { - struct device *dev; + struct ap_queue *queue; struct vfio_ap_queue *q = NULL; - dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, - &apqn, match_apqn); - if (dev) { - q = dev_get_drvdata(dev); - put_device(dev); - } + queue = ap_get_qdev(apqn); + if (!queue) + return NULL; + + if (queue->ap_dev.device.driver == &matrix_dev->vfio_ap_drv->driver) + q = dev_get_drvdata(&queue->ap_dev.device); + + put_device(&queue->ap_dev.device); return q; } -int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, - unsigned int retry) +static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, + unsigned int retry) { struct ap_queue_status status; int ret; @@ -1308,9 +1607,9 @@ int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, if (!q) return 0; - retry_zapq: status = ap_zapq(q->apqn); + q->reset_rc = status.response_code; switch (status.response_code) { case AP_RESPONSE_NORMAL: ret = 0; @@ -1325,12 +1624,17 @@ retry_zapq: case AP_RESPONSE_Q_NOT_AVAIL: case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: - WARN_ON_ONCE(status.irq_enabled); + WARN_ONCE(status.irq_enabled, + "PQAP/ZAPQ for %02x.%04x failed with rc=%u while IRQ enabled", + AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn), + status.response_code); ret = -EBUSY; goto free_resources; default: /* things are really broken, give up */ - WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", + WARN(true, + "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n", + AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn), status.response_code); return -EIO; } @@ -1342,7 +1646,8 @@ retry_zapq: msleep(20); status = ap_tapq(q->apqn, NULL); } - WARN_ON_ONCE(retry2 <= 0); + WARN_ONCE(retry2 <= 0, "unable to verify reset of queue %02x.%04x", + AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn)); free_resources: vfio_ap_free_aqic_resources(q); @@ -1350,27 +1655,20 @@ free_resources: return ret; } -static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) +static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) { - int ret; - int rc = 0; - unsigned long apid, apqi; + int ret, loop_cursor, rc = 0; struct vfio_ap_queue *q; - for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, - matrix_mdev->matrix.apm_max + 1) { - for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, - matrix_mdev->matrix.aqm_max + 1) { - q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); - ret = vfio_ap_mdev_reset_queue(q, 1); - /* - * Regardless whether a queue turns out to be busy, or - * is not operational, we need to continue resetting - * the remaining queues. - */ - if (ret) - rc = ret; - } + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + ret = vfio_ap_mdev_reset_queue(q, 1); + /* + * Regardless whether a queue turns out to be busy, or + * is not operational, we need to continue resetting + * the remaining queues. + */ + if (ret) + rc = ret; } return rc; @@ -1380,27 +1678,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long events; - int ret; if (!vdev->kvm) return -EINVAL; - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); - if (ret) - return ret; - - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, - &matrix_mdev->iommu_notifier); - if (ret) - goto err_kvm; - return 0; - -err_kvm: - vfio_ap_mdev_unset_kvm(matrix_mdev); - return ret; + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); } static void vfio_ap_mdev_close_device(struct vfio_device *vdev) @@ -1408,8 +1690,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, - &matrix_mdev->iommu_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev); } @@ -1440,27 +1720,84 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, container_of(vdev, struct ap_matrix_mdev, vdev); int ret; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); switch (cmd) { case VFIO_DEVICE_GET_INFO: ret = vfio_ap_mdev_get_device_info(arg); break; case VFIO_DEVICE_RESET: - ret = vfio_ap_mdev_reset_queues(matrix_mdev); + ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); break; default: ret = -EOPNOTSUPP; break; } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } +static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q) +{ + struct ap_matrix_mdev *matrix_mdev; + unsigned long apid = AP_QID_CARD(q->apqn); + unsigned long apqi = AP_QID_QUEUE(q->apqn); + + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + if (test_bit_inv(apid, matrix_mdev->matrix.apm) && + test_bit_inv(apqi, matrix_mdev->matrix.aqm)) + return matrix_mdev; + } + + return NULL; +} + +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t nchars = 0; + struct vfio_ap_queue *q; + struct ap_matrix_mdev *matrix_mdev; + struct ap_device *apdev = to_ap_dev(dev); + + mutex_lock(&matrix_dev->mdevs_lock); + q = dev_get_drvdata(&apdev->device); + matrix_mdev = vfio_ap_mdev_for_queue(q); + + if (matrix_mdev) { + if (matrix_mdev->kvm) + nchars = scnprintf(buf, PAGE_SIZE, "%s\n", + AP_QUEUE_IN_USE); + else + nchars = scnprintf(buf, PAGE_SIZE, "%s\n", + AP_QUEUE_ASSIGNED); + } else { + nchars = scnprintf(buf, PAGE_SIZE, "%s\n", + AP_QUEUE_UNASSIGNED); + } + + mutex_unlock(&matrix_dev->mdevs_lock); + + return nchars; +} + +static DEVICE_ATTR_RO(status); + +static struct attribute *vfio_queue_attrs[] = { + &dev_attr_status.attr, + NULL, +}; + +static const struct attribute_group vfio_queue_attr_group = { + .attrs = vfio_queue_attrs, +}; + static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, + .dma_unmap = vfio_ap_mdev_dma_unmap, }; static struct mdev_driver vfio_ap_matrix_driver = { @@ -1500,3 +1837,432 @@ void vfio_ap_mdev_unregister(void) mdev_unregister_device(&matrix_dev->device); mdev_unregister_driver(&vfio_ap_matrix_driver); } + +int vfio_ap_mdev_probe_queue(struct ap_device *apdev) +{ + int ret; + struct vfio_ap_queue *q; + struct ap_matrix_mdev *matrix_mdev; + + ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group); + if (ret) + return ret; + + q = kzalloc(sizeof(*q), GFP_KERNEL); + if (!q) + return -ENOMEM; + + q->apqn = to_ap_queue(&apdev->device)->qid; + q->saved_isc = VFIO_AP_ISC_INVALID; + matrix_mdev = get_update_locks_by_apqn(q->apqn); + + if (matrix_mdev) { + vfio_ap_mdev_link_queue(matrix_mdev, q); + + if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, + matrix_mdev->matrix.aqm, + matrix_mdev)) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + } + dev_set_drvdata(&apdev->device, q); + release_update_locks_for_mdev(matrix_mdev); + + return 0; +} + +void vfio_ap_mdev_remove_queue(struct ap_device *apdev) +{ + unsigned long apid, apqi; + struct vfio_ap_queue *q; + struct ap_matrix_mdev *matrix_mdev; + + sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group); + q = dev_get_drvdata(&apdev->device); + get_update_locks_for_queue(q); + matrix_mdev = q->matrix_mdev; + + if (matrix_mdev) { + vfio_ap_unlink_queue_fr_mdev(q); + + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); + + /* + * If the queue is assigned to the guest's APCB, then remove + * the adapter's APID from the APCB and hot it into the guest. + */ + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + } + } + + vfio_ap_mdev_reset_queue(q, 1); + dev_set_drvdata(&apdev->device, NULL); + kfree(q); + release_update_locks_for_mdev(matrix_mdev); +} + +/** + * vfio_ap_mdev_resource_in_use: check whether any of a set of APQNs is + * assigned to a mediated device under the control + * of the vfio_ap device driver. + * + * @apm: a bitmap specifying a set of APIDs comprising the APQNs to check. + * @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check. + * + * Return: + * * -EADDRINUSE if one or more of the APQNs specified via @apm/@aqm are + * assigned to a mediated device under the control of the vfio_ap + * device driver. + * * Otherwise, return 0. + */ +int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm) +{ + int ret; + + mutex_lock(&matrix_dev->guests_lock); + mutex_lock(&matrix_dev->mdevs_lock); + ret = vfio_ap_mdev_verify_no_sharing(apm, aqm); + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_dev->guests_lock); + + return ret; +} + +/** + * vfio_ap_mdev_hot_unplug_cfg - hot unplug the adapters, domains and control + * domains that have been removed from the host's + * AP configuration from a guest. + * + * @matrix_mdev: an ap_matrix_mdev object attached to a KVM guest. + * @aprem: the adapters that have been removed from the host's AP configuration + * @aqrem: the domains that have been removed from the host's AP configuration + * @cdrem: the control domains that have been removed from the host's AP + * configuration. + */ +static void vfio_ap_mdev_hot_unplug_cfg(struct ap_matrix_mdev *matrix_mdev, + unsigned long *aprem, + unsigned long *aqrem, + unsigned long *cdrem) +{ + int do_hotplug = 0; + + if (!bitmap_empty(aprem, AP_DEVICES)) { + do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.apm, + matrix_mdev->shadow_apcb.apm, + aprem, AP_DEVICES); + } + + if (!bitmap_empty(aqrem, AP_DOMAINS)) { + do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.aqm, + matrix_mdev->shadow_apcb.aqm, + aqrem, AP_DEVICES); + } + + if (!bitmap_empty(cdrem, AP_DOMAINS)) + do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.adm, + matrix_mdev->shadow_apcb.adm, + cdrem, AP_DOMAINS); + + if (do_hotplug) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); +} + +/** + * vfio_ap_mdev_cfg_remove - determines which guests are using the adapters, + * domains and control domains that have been removed + * from the host AP configuration and unplugs them + * from those guests. + * + * @ap_remove: bitmap specifying which adapters have been removed from the host + * config. + * @aq_remove: bitmap specifying which domains have been removed from the host + * config. + * @cd_remove: bitmap specifying which control domains have been removed from + * the host config. + */ +static void vfio_ap_mdev_cfg_remove(unsigned long *ap_remove, + unsigned long *aq_remove, + unsigned long *cd_remove) +{ + struct ap_matrix_mdev *matrix_mdev; + DECLARE_BITMAP(aprem, AP_DEVICES); + DECLARE_BITMAP(aqrem, AP_DOMAINS); + DECLARE_BITMAP(cdrem, AP_DOMAINS); + int do_remove = 0; + + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + mutex_lock(&matrix_mdev->kvm->lock); + mutex_lock(&matrix_dev->mdevs_lock); + + do_remove |= bitmap_and(aprem, ap_remove, + matrix_mdev->matrix.apm, + AP_DEVICES); + do_remove |= bitmap_and(aqrem, aq_remove, + matrix_mdev->matrix.aqm, + AP_DOMAINS); + do_remove |= bitmap_andnot(cdrem, cd_remove, + matrix_mdev->matrix.adm, + AP_DOMAINS); + + if (do_remove) + vfio_ap_mdev_hot_unplug_cfg(matrix_mdev, aprem, aqrem, + cdrem); + + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_mdev->kvm->lock); + } +} + +/** + * vfio_ap_mdev_on_cfg_remove - responds to the removal of adapters, domains and + * control domains from the host AP configuration + * by unplugging them from the guests that are + * using them. + * @cur_config_info: the current host AP configuration information + * @prev_config_info: the previous host AP configuration information + */ +static void vfio_ap_mdev_on_cfg_remove(struct ap_config_info *cur_config_info, + struct ap_config_info *prev_config_info) +{ + int do_remove; + DECLARE_BITMAP(aprem, AP_DEVICES); + DECLARE_BITMAP(aqrem, AP_DOMAINS); + DECLARE_BITMAP(cdrem, AP_DOMAINS); + + do_remove = bitmap_andnot(aprem, + (unsigned long *)prev_config_info->apm, + (unsigned long *)cur_config_info->apm, + AP_DEVICES); + do_remove |= bitmap_andnot(aqrem, + (unsigned long *)prev_config_info->aqm, + (unsigned long *)cur_config_info->aqm, + AP_DEVICES); + do_remove |= bitmap_andnot(cdrem, + (unsigned long *)prev_config_info->adm, + (unsigned long *)cur_config_info->adm, + AP_DEVICES); + + if (do_remove) + vfio_ap_mdev_cfg_remove(aprem, aqrem, cdrem); +} + +/** + * vfio_ap_filter_apid_by_qtype: filter APIDs from an AP mask for adapters that + * are older than AP type 10 (CEX4). + * @apm: a bitmap of the APIDs to examine + * @aqm: a bitmap of the APQIs of the queues to query for the AP type. + */ +static void vfio_ap_filter_apid_by_qtype(unsigned long *apm, unsigned long *aqm) +{ + bool apid_cleared; + struct ap_queue_status status; + unsigned long apid, apqi, info; + int qtype, qtype_mask = 0xff000000; + + for_each_set_bit_inv(apid, apm, AP_DEVICES) { + apid_cleared = false; + + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + status = ap_test_queue(AP_MKQID(apid, apqi), 1, &info); + switch (status.response_code) { + /* + * According to the architecture in each case + * below, the queue's info should be filled. + */ + case AP_RESPONSE_NORMAL: + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_BUSY: + qtype = info & qtype_mask; + + /* + * The vfio_ap device driver only + * supports CEX4 and newer adapters, so + * remove the APID if the adapter is + * older than a CEX4. + */ + if (qtype < AP_DEVICE_TYPE_CEX4) { + clear_bit_inv(apid, apm); + apid_cleared = true; + } + + break; + + default: + /* + * If we don't know the adapter type, + * clear its APID since it can't be + * determined whether the vfio_ap + * device driver supports it. + */ + clear_bit_inv(apid, apm); + apid_cleared = true; + break; + } + + /* + * If we've already cleared the APID from the apm, there + * is no need to continue examining the remainin AP + * queues to determine the type of the adapter. + */ + if (apid_cleared) + continue; + } + } +} + +/** + * vfio_ap_mdev_cfg_add - store bitmaps specifying the adapters, domains and + * control domains that have been added to the host's + * AP configuration for each matrix mdev to which they + * are assigned. + * + * @apm_add: a bitmap specifying the adapters that have been added to the AP + * configuration. + * @aqm_add: a bitmap specifying the domains that have been added to the AP + * configuration. + * @adm_add: a bitmap specifying the control domains that have been added to the + * AP configuration. + */ +static void vfio_ap_mdev_cfg_add(unsigned long *apm_add, unsigned long *aqm_add, + unsigned long *adm_add) +{ + struct ap_matrix_mdev *matrix_mdev; + + if (list_empty(&matrix_dev->mdev_list)) + return; + + vfio_ap_filter_apid_by_qtype(apm_add, aqm_add); + + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + bitmap_and(matrix_mdev->apm_add, + matrix_mdev->matrix.apm, apm_add, AP_DEVICES); + bitmap_and(matrix_mdev->aqm_add, + matrix_mdev->matrix.aqm, aqm_add, AP_DOMAINS); + bitmap_and(matrix_mdev->adm_add, + matrix_mdev->matrix.adm, adm_add, AP_DEVICES); + } +} + +/** + * vfio_ap_mdev_on_cfg_add - responds to the addition of adapters, domains and + * control domains to the host AP configuration + * by updating the bitmaps that specify what adapters, + * domains and control domains have been added so they + * can be hot plugged into the guest when the AP bus + * scan completes (see vfio_ap_on_scan_complete + * function). + * @cur_config_info: the current AP configuration information + * @prev_config_info: the previous AP configuration information + */ +static void vfio_ap_mdev_on_cfg_add(struct ap_config_info *cur_config_info, + struct ap_config_info *prev_config_info) +{ + bool do_add; + DECLARE_BITMAP(apm_add, AP_DEVICES); + DECLARE_BITMAP(aqm_add, AP_DOMAINS); + DECLARE_BITMAP(adm_add, AP_DOMAINS); + + do_add = bitmap_andnot(apm_add, + (unsigned long *)cur_config_info->apm, + (unsigned long *)prev_config_info->apm, + AP_DEVICES); + do_add |= bitmap_andnot(aqm_add, + (unsigned long *)cur_config_info->aqm, + (unsigned long *)prev_config_info->aqm, + AP_DOMAINS); + do_add |= bitmap_andnot(adm_add, + (unsigned long *)cur_config_info->adm, + (unsigned long *)prev_config_info->adm, + AP_DOMAINS); + + if (do_add) + vfio_ap_mdev_cfg_add(apm_add, aqm_add, adm_add); +} + +/** + * vfio_ap_on_cfg_changed - handles notification of changes to the host AP + * configuration. + * + * @cur_cfg_info: the current host AP configuration + * @prev_cfg_info: the previous host AP configuration + */ +void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, + struct ap_config_info *prev_cfg_info) +{ + if (!cur_cfg_info || !prev_cfg_info) + return; + + mutex_lock(&matrix_dev->guests_lock); + + vfio_ap_mdev_on_cfg_remove(cur_cfg_info, prev_cfg_info); + vfio_ap_mdev_on_cfg_add(cur_cfg_info, prev_cfg_info); + memcpy(&matrix_dev->info, cur_cfg_info, sizeof(*cur_cfg_info)); + + mutex_unlock(&matrix_dev->guests_lock); +} + +static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) +{ + bool do_hotplug = false; + int filter_domains = 0; + int filter_adapters = 0; + DECLARE_BITMAP(apm, AP_DEVICES); + DECLARE_BITMAP(aqm, AP_DOMAINS); + + mutex_lock(&matrix_mdev->kvm->lock); + mutex_lock(&matrix_dev->mdevs_lock); + + filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + + if (filter_adapters && filter_domains) + do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); + else if (filter_adapters) + do_hotplug |= + vfio_ap_mdev_filter_matrix(apm, + matrix_mdev->shadow_apcb.aqm, + matrix_mdev); + else + do_hotplug |= + vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, + aqm, matrix_mdev); + + if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, + AP_DOMAINS)) + do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); + + if (do_hotplug) + vfio_ap_mdev_update_guest_apcb(matrix_mdev); + + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_mdev->kvm->lock); +} + +void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info) +{ + struct ap_matrix_mdev *matrix_mdev; + + mutex_lock(&matrix_dev->guests_lock); + + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + if (bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) && + bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS) && + bitmap_empty(matrix_mdev->adm_add, AP_DOMAINS)) + continue; + + vfio_ap_mdev_hot_plug_cfg(matrix_mdev); + bitmap_clear(matrix_mdev->apm_add, 0, AP_DEVICES); + bitmap_clear(matrix_mdev->aqm_add, 0, AP_DOMAINS); + bitmap_clear(matrix_mdev->adm_add, 0, AP_DOMAINS); + } + + mutex_unlock(&matrix_dev->guests_lock); +} diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index a26efd804d0d..d782cf463eab 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/kvm_host.h> #include <linux/vfio.h> +#include <linux/hashtable.h> #include "ap_bus.h" @@ -32,20 +33,26 @@ * @available_instances: number of mediated matrix devices that can be created * @info: the struct containing the output from the PQAP(QCI) instruction * @mdev_list: the list of mediated matrix devices created - * @lock: mutex for locking the AP matrix device. This lock will be + * @mdevs_lock: mutex for locking the AP matrix device. This lock will be * taken every time we fiddle with state managed by the vfio_ap * driver, be it using @mdev_list or writing the state of a * single ap_matrix_mdev device. It's quite coarse but we don't * expect much contention. * @vfio_ap_drv: the vfio_ap device driver + * @guests_lock: mutex for controlling access to a guest that is using AP + * devices passed through by the vfio_ap device driver. This lock + * will be taken when the AP devices are plugged into or unplugged + * from a guest, and when an ap_matrix_mdev device is added to or + * removed from @mdev_list or the list is iterated. */ struct ap_matrix_dev { struct device device; atomic_t available_instances; struct ap_config_info info; struct list_head mdev_list; - struct mutex lock; + struct mutex mdevs_lock; /* serializes access to each ap_matrix_mdev */ struct ap_driver *vfio_ap_drv; + struct mutex guests_lock; /* serializes access to each KVM guest */ }; extern struct ap_matrix_dev *matrix_dev; @@ -75,48 +82,77 @@ struct ap_matrix { }; /** + * struct ap_queue_table - a table of queue objects. + * + * @queues: a hashtable of queues (struct vfio_ap_queue). + */ +struct ap_queue_table { + DECLARE_HASHTABLE(queues, 8); +}; + +/** * struct ap_matrix_mdev - Contains the data associated with a matrix mediated * device. * @vdev: the vfio device * @node: allows the ap_matrix_mdev struct to be added to a list * @matrix: the adapters, usage domains and control domains assigned to the * mediated matrix device. - * @iommu_notifier: notifier block used for specifying callback function for - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even + * @shadow_apcb: the shadow copy of the APCB field of the KVM guest's CRYCB * @kvm: the struct holding guest's state * @pqap_hook: the function pointer to the interception handler for the * PQAP(AQIC) instruction. * @mdev: the mediated device + * @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev + * @apm_add: bitmap of APIDs added to the host's AP configuration + * @aqm_add: bitmap of APQIs added to the host's AP configuration + * @adm_add: bitmap of control domain numbers added to the host's AP + * configuration */ struct ap_matrix_mdev { struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; - struct notifier_block iommu_notifier; + struct ap_matrix shadow_apcb; struct kvm *kvm; crypto_hook pqap_hook; struct mdev_device *mdev; + struct ap_queue_table qtable; + DECLARE_BITMAP(apm_add, AP_DEVICES); + DECLARE_BITMAP(aqm_add, AP_DOMAINS); + DECLARE_BITMAP(adm_add, AP_DOMAINS); }; /** * struct vfio_ap_queue - contains the data associated with a queue bound to the * vfio_ap device driver * @matrix_mdev: the matrix mediated device - * @saved_pfn: the guest PFN pinned for the guest + * @saved_iova: the notification indicator byte (nib) address * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface + * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable + * @reset_rc: the status response code from the last reset of the queue */ struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; - unsigned long saved_pfn; + dma_addr_t saved_iova; int apqn; #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; + struct hlist_node mdev_qnode; + unsigned int reset_rc; }; int vfio_ap_mdev_register(void); void vfio_ap_mdev_unregister(void); -int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, - unsigned int retry); + +int vfio_ap_mdev_probe_queue(struct ap_device *queue); +void vfio_ap_mdev_remove_queue(struct ap_device *queue); + +int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm); + +void vfio_ap_on_cfg_changed(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); +void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); #endif /* _VFIO_AP_PRIVATE_H_ */ diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 5f7e28de8b15..d34bb6ec1490 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -409,20 +409,19 @@ static void ism_create_system_eid(void) memcpy(&SYSTEM_EID.type, tmp, 4); } -static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +static u8 *ism_get_system_eid(void) { - *eid = &SYSTEM_EID.seid_string[0]; + return SYSTEM_EID.seid_string; } static u16 ism_get_chid(struct smcd_dev *smcd) { - struct ism_dev *ismdev; + struct ism_dev *ism = (struct ism_dev *)smcd->priv; - ismdev = (struct ism_dev *)smcd->priv; - if (!ismdev || !ismdev->pdev) + if (!ism || !ism->pdev) return 0; - return to_zpci(ismdev->pdev)->pchid; + return to_zpci(ism->pdev)->pchid; } static void ism_handle_event(struct ism_dev *ism) @@ -444,6 +443,7 @@ static irqreturn_t ism_handle_irq(int irq, void *data) struct ism_dev *ism = data; unsigned long bit, end; unsigned long *bv; + u16 dmbemask; bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET]; end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET; @@ -457,9 +457,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data) break; clear_bit_inv(bit, bv); + dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET]; ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; barrier(); - smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET); + smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET, dmbemask); } if (ism->sba->e) { diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9e54fe76a9b2..8bd9fd51208c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -763,6 +763,49 @@ static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, ipa_name, com, CARD_DEVID(card)); } +static void qeth_default_link_info(struct qeth_card *card) +{ + struct qeth_link_info *link_info = &card->info.link_info; + + QETH_CARD_TEXT(card, 2, "dftlinfo"); + link_info->duplex = DUPLEX_FULL; + + if (IS_IQD(card) || IS_VM_NIC(card)) { + link_info->speed = SPEED_10000; + link_info->port = PORT_FIBRE; + link_info->link_mode = QETH_LINK_MODE_FIBRE_SHORT; + } else { + switch (card->info.link_type) { + case QETH_LINK_TYPE_FAST_ETH: + case QETH_LINK_TYPE_LANE_ETH100: + link_info->speed = SPEED_100; + link_info->port = PORT_TP; + break; + case QETH_LINK_TYPE_GBIT_ETH: + case QETH_LINK_TYPE_LANE_ETH1000: + link_info->speed = SPEED_1000; + link_info->port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_10GBIT_ETH: + link_info->speed = SPEED_10000; + link_info->port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_25GBIT_ETH: + link_info->speed = SPEED_25000; + link_info->port = PORT_FIBRE; + break; + default: + dev_info(&card->gdev->dev, + "Unknown link type %x\n", + card->info.link_type); + link_info->speed = SPEED_UNKNOWN; + link_info->port = PORT_OTHER; + } + + link_info->link_mode = QETH_LINK_MODE_UNKNOWN; + } +} + static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, struct qeth_ipa_cmd *cmd) { @@ -790,6 +833,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, netdev_name(card->dev), card->info.chpid); qeth_issue_ipa_msg(cmd, cmd->hdr.return_code, card); netif_carrier_off(card->dev); + qeth_default_link_info(card); } return NULL; case IPA_CMD_STARTLAN: @@ -3565,7 +3609,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, if (!atomic_read(&queue->set_pci_flags_count)) { /* * there's no outstanding PCI any more, so we - * have to request a PCI to be sure the the PCI + * have to request a PCI to be sure the PCI * will wake at some time in the future then we * can flush packed buffers that might still be * hanging around, which can happen if no @@ -4744,92 +4788,6 @@ out_free: return rc; } -static int qeth_query_card_info_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) -{ - struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; - struct qeth_link_info *link_info = reply->param; - struct qeth_query_card_info *card_info; - - QETH_CARD_TEXT(card, 2, "qcrdincb"); - if (qeth_setadpparms_inspect_rc(cmd)) - return -EIO; - - card_info = &cmd->data.setadapterparms.data.card_info; - netdev_dbg(card->dev, - "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", - card_info->card_type, card_info->port_mode, - card_info->port_speed); - - switch (card_info->port_mode) { - case CARD_INFO_PORTM_FULLDUPLEX: - link_info->duplex = DUPLEX_FULL; - break; - case CARD_INFO_PORTM_HALFDUPLEX: - link_info->duplex = DUPLEX_HALF; - break; - default: - link_info->duplex = DUPLEX_UNKNOWN; - } - - switch (card_info->card_type) { - case CARD_INFO_TYPE_1G_COPPER_A: - case CARD_INFO_TYPE_1G_COPPER_B: - link_info->speed = SPEED_1000; - link_info->port = PORT_TP; - break; - case CARD_INFO_TYPE_1G_FIBRE_A: - case CARD_INFO_TYPE_1G_FIBRE_B: - link_info->speed = SPEED_1000; - link_info->port = PORT_FIBRE; - break; - case CARD_INFO_TYPE_10G_FIBRE_A: - case CARD_INFO_TYPE_10G_FIBRE_B: - link_info->speed = SPEED_10000; - link_info->port = PORT_FIBRE; - break; - default: - switch (card_info->port_speed) { - case CARD_INFO_PORTS_10M: - link_info->speed = SPEED_10; - break; - case CARD_INFO_PORTS_100M: - link_info->speed = SPEED_100; - break; - case CARD_INFO_PORTS_1G: - link_info->speed = SPEED_1000; - break; - case CARD_INFO_PORTS_10G: - link_info->speed = SPEED_10000; - break; - case CARD_INFO_PORTS_25G: - link_info->speed = SPEED_25000; - break; - default: - link_info->speed = SPEED_UNKNOWN; - } - - link_info->port = PORT_OTHER; - } - - return 0; -} - -int qeth_query_card_info(struct qeth_card *card, - struct qeth_link_info *link_info) -{ - struct qeth_cmd_buffer *iob; - - QETH_CARD_TEXT(card, 2, "qcrdinfo"); - if (!qeth_adp_supported(card, IPA_SETADP_QUERY_CARD_INFO)) - return -EOPNOTSUPP; - iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_CARD_INFO, 0); - if (!iob) - return -ENOMEM; - - return qeth_send_ipa_cmd(card, iob, qeth_query_card_info_cb, link_info); -} - static int qeth_init_link_info_oat_cb(struct qeth_card *card, struct qeth_reply *reply_priv, unsigned long data) @@ -4839,6 +4797,7 @@ static int qeth_init_link_info_oat_cb(struct qeth_card *card, struct qeth_query_oat_physical_if *phys_if; struct qeth_query_oat_reply *reply; + QETH_CARD_TEXT(card, 2, "qoatincb"); if (qeth_setadpparms_inspect_rc(cmd)) return -EIO; @@ -4918,41 +4877,7 @@ static int qeth_init_link_info_oat_cb(struct qeth_card *card, static void qeth_init_link_info(struct qeth_card *card) { - card->info.link_info.duplex = DUPLEX_FULL; - - if (IS_IQD(card) || IS_VM_NIC(card)) { - card->info.link_info.speed = SPEED_10000; - card->info.link_info.port = PORT_FIBRE; - card->info.link_info.link_mode = QETH_LINK_MODE_FIBRE_SHORT; - } else { - switch (card->info.link_type) { - case QETH_LINK_TYPE_FAST_ETH: - case QETH_LINK_TYPE_LANE_ETH100: - card->info.link_info.speed = SPEED_100; - card->info.link_info.port = PORT_TP; - break; - case QETH_LINK_TYPE_GBIT_ETH: - case QETH_LINK_TYPE_LANE_ETH1000: - card->info.link_info.speed = SPEED_1000; - card->info.link_info.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_10GBIT_ETH: - card->info.link_info.speed = SPEED_10000; - card->info.link_info.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_25GBIT_ETH: - card->info.link_info.speed = SPEED_25000; - card->info.link_info.port = PORT_FIBRE; - break; - default: - dev_info(&card->gdev->dev, "Unknown link type %x\n", - card->info.link_type); - card->info.link_info.speed = SPEED_UNKNOWN; - card->info.link_info.port = PORT_OTHER; - } - - card->info.link_info.link_mode = QETH_LINK_MODE_UNKNOWN; - } + qeth_default_link_info(card); /* Get more accurate data via QUERY OAT: */ if (qeth_adp_supported(card, IPA_SETADP_QUERY_OAT)) { @@ -5461,6 +5386,7 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, qeth_clear_working_pool_list(card); qeth_flush_local_addrs(card); card->info.promisc_mode = 0; + qeth_default_link_info(card); rc = qeth_stop_channel(&card->data); rc2 = qeth_stop_channel(&card->write); diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index b0b36b2132fe..9eba0a32e9f9 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -428,8 +428,8 @@ static int qeth_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct qeth_card *card = netdev->ml_priv; - struct qeth_link_info link_info; + QETH_CARD_TEXT(card, 4, "ethtglks"); cmd->base.speed = card->info.link_info.speed; cmd->base.duplex = card->info.link_info.duplex; cmd->base.port = card->info.link_info.port; @@ -439,16 +439,6 @@ static int qeth_get_link_ksettings(struct net_device *netdev, cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - /* Check if we can obtain more accurate information. */ - if (!qeth_query_card_info(card, &link_info)) { - if (link_info.speed != SPEED_UNKNOWN) - cmd->base.speed = link_info.speed; - if (link_info.duplex != DUPLEX_UNKNOWN) - cmd->base.duplex = link_info.duplex; - if (link_info.port != PORT_OTHER) - cmd->base.port = link_info.port; - } - qeth_set_ethtool_link_modes(cmd, card->info.link_info.link_mode); return 0; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 97e51c34e6cf..896896e32664 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -33,6 +33,7 @@ #include <asm/virtio-ccw.h> #include <asm/isc.h> #include <asm/airq.h> +#include <asm/tpi.h> /* * virtio related functions @@ -204,7 +205,8 @@ static void drop_airq_indicator(struct virtqueue *vq, struct airq_info *info) write_unlock_irqrestore(&info->lock, flags); } -static void virtio_airq_handler(struct airq_struct *airq, bool floating) +static void virtio_airq_handler(struct airq_struct *airq, + struct tpi_info *tpi_info) { struct airq_info *info = container_of(airq, struct airq_info, airq); unsigned long ai; @@ -240,7 +242,7 @@ static struct airq_info *new_airq_info(int index) return NULL; rwlock_init(&info->lock); info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR - | AIRQ_IV_CACHELINE); + | AIRQ_IV_CACHELINE, NULL); if (!info->aiv) { kfree(info); return NULL; @@ -530,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, err = -ENOMEM; goto out_err; } + + vq->num_max = info->num; + /* it may have been reduced */ info->num = virtqueue_get_vring_size(vq); @@ -632,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool *ctx, struct irq_affinity *desc) { @@ -1136,8 +1142,13 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vcdev->err = -EIO; } virtio_ccw_check_activity(vcdev, activity); - /* Interrupts are disabled here */ +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + /* + * Paired with virtio_ccw_synchronize_cbs() and interrupts are + * disabled here. + */ read_lock(&vcdev->irq_lock); +#endif for_each_set_bit(i, indicators(vcdev), sizeof(*indicators(vcdev)) * BITS_PER_BYTE) { /* The bit clear must happen before the vring kick. */ @@ -1146,7 +1157,9 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vq = virtio_ccw_vq_by_ind(vcdev, i); vring_interrupt(0, vq); } +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION read_unlock(&vcdev->irq_lock); +#endif if (test_bit(0, indicators2(vcdev))) { virtio_config_changed(&vcdev->vdev); clear_bit(0, indicators2(vcdev)); |