From e36ba5ab808ef6237c3148d469c8238674230e2b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:23 -0700 Subject: iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that provides user space (VMM) another FD to read the vIOMMU Events. Allow a vIOMMU object to allocate vEVENTQs, with a condition that each vIOMMU can only have one single vEVENTQ per type. Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl. Link: https://patch.msgid.link/r/21acf0751dd5c93846935ee06f93b9c65eff5e04.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c749200..8948b1836940 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -34,6 +34,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, + IOMMUFD_OBJ_VEVENTQ, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -93,6 +94,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; struct xarray vdevs; + struct list_head veventqs; + struct rw_semaphore veventqs_rwsem; unsigned int type; }; -- cgit v1.2.3 From ea94b211c5483080b749c142090f4c4de4926e51 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:24 -0700 Subject: iommufd/viommu: Add iommufd_viommu_get_vdev_id helper This is a reverse search v.s. iommufd_viommu_find_dev, as drivers may want to convert a struct device pointer (physical) to its virtual device ID for an event injection to the user space VM. Again, this avoids exposing more core structures to the drivers, than the iommufd_viommu alone. Link: https://patch.msgid.link/r/18b8e8bc1b8104d43b205d21602c036fd0804e56.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 24 ++++++++++++++++++++++++ include/linux/iommufd.h | 9 +++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 2d98b04ff1cb..f132b98fb899 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -49,5 +49,29 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); +/* Return -ENOENT if device is not associated to the vIOMMU */ +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id) +{ + struct iommufd_vdevice *vdev; + unsigned long index; + int rc = -ENOENT; + + if (WARN_ON_ONCE(!vdev_id)) + return -EINVAL; + + xa_lock(&viommu->vdevs); + xa_for_each(&viommu->vdevs, index, vdev) { + if (vdev->dev == dev) { + *vdev_id = vdev->id; + rc = 0; + break; + } + } + xa_unlock(&viommu->vdevs); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 8948b1836940..05cb393aff0a 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -190,6 +190,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -203,6 +205,13 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { return NULL; } + +static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, + unsigned long *vdev_id) +{ + return -ENOENT; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From e8e1ef9b77a7a09b7809890a52229f24d3c8b532 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:25 -0700 Subject: iommufd/viommu: Add iommufd_viommu_report_event helper Similar to iommu_report_device_fault, this allows IOMMU drivers to report vIOMMU events from threaded IRQ handlers to user space hypervisors. Link: https://patch.msgid.link/r/44be825042c8255e75d0151b338ffd8ba0e4920b.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 48 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 11 ++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index f132b98fb899..75b365561c16 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -73,5 +73,53 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); +/* + * Typically called in driver's threaded IRQ handler. + * The @type and @event_data must be defined in include/uapi/linux/iommufd.h + */ +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len) +{ + struct iommufd_veventq *veventq; + struct iommufd_vevent *vevent; + int rc = 0; + + if (WARN_ON_ONCE(!data_len || !event_data)) + return -EINVAL; + + down_read(&viommu->veventqs_rwsem); + + veventq = iommufd_viommu_find_veventq(viommu, type); + if (!veventq) { + rc = -EOPNOTSUPP; + goto out_unlock_veventqs; + } + + spin_lock(&veventq->common.lock); + if (veventq->num_events == veventq->depth) { + vevent = &veventq->lost_events_header; + goto out_set_header; + } + + vevent = kmalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); + if (!vevent) { + rc = -ENOMEM; + vevent = &veventq->lost_events_header; + goto out_set_header; + } + memcpy(vevent->event_data, event_data, data_len); + vevent->data_len = data_len; + veventq->num_events++; + +out_set_header: + iommufd_vevent_handler(veventq, vevent); + spin_unlock(&veventq->common.lock); +out_unlock_veventqs: + up_read(&viommu->veventqs_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 05cb393aff0a..60eff9272551 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -11,6 +11,7 @@ #include #include #include +#include struct device; struct file; @@ -192,6 +193,9 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, struct device *dev, unsigned long *vdev_id); +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -212,6 +216,13 @@ static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, { return -ENOENT; } + +static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, + void *event_data, size_t data_len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From 6aa63a4ec947f350d1a2f9f6aba8591a2455d192 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Mar 2025 21:05:15 -0700 Subject: iommu: Sort out domain user data When DMA/MSI cookies were made first-class citizens back in commit 46983fcd67ac ("iommu: Pull IOVA cookie management into the core"), there was no real need to further expose the two different cookie types. However, now that IOMMUFD wants to add a third type of MSI-mapping cookie, we do have a nicely compelling reason to properly dismabiguate things at the domain level beyond just vaguely guessing from the domain type. Meanwhile, we also effectively have another "cookie" in the form of the anonymous union for other user data, which isn't much better in terms of being vague and unenforced. The fact is that all these cookie types are mutually exclusive, in the sense that combining them makes zero sense and/or would be catastrophic (iommu_set_fault_handler() on an SVA domain, anyone?) - the only combination which *might* be reasonable is perhaps a fault handler and an MSI cookie, but nobody's doing that at the moment, so let's rule it out as well for the sake of being clear and robust. To that end, we pull DMA and MSI cookies apart a little more, mostly to clear up the ambiguity at domain teardown, then for clarity (and to save a little space), move them into the union, whose ownership we can then properly describe and enforce entirely unambiguously. [nicolinc: rebase on latest tree; use prefix IOMMU_COOKIE_; merge unions in iommu_domain; add IOMMU_COOKIE_IOMMUFD for iommufd_hwpt] Link: https://patch.msgid.link/r/1ace9076c95204bbe193ee77499d395f15f44b23.1742871535.git.nicolinc@nvidia.com Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 194 +++++++++++++++++++---------------- drivers/iommu/dma-iommu.h | 5 + drivers/iommu/iommu-sva.c | 1 + drivers/iommu/iommu.c | 18 +++- drivers/iommu/iommufd/hw_pagetable.c | 3 + include/linux/iommu.h | 20 +++- 6 files changed, 143 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 94263ed2c564..31a7b4b81656 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -42,11 +42,6 @@ struct iommu_dma_msi_page { phys_addr_t phys; }; -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - enum iommu_dma_queue_type { IOMMU_DMA_OPTS_PER_CPU_QUEUE, IOMMU_DMA_OPTS_SINGLE_QUEUE, @@ -59,35 +54,31 @@ struct iommu_dma_options { }; struct iommu_dma_cookie { - enum iommu_dma_cookie_type type; + struct iova_domain iovad; + struct list_head msi_page_list; + /* Flush queue */ union { - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct { - struct iova_domain iovad; - /* Flush queue */ - union { - struct iova_fq *single_fq; - struct iova_fq __percpu *percpu_fq; - }; - /* Number of TLB flushes that have been started */ - atomic64_t fq_flush_start_cnt; - /* Number of TLB flushes that have been finished */ - atomic64_t fq_flush_finish_cnt; - /* Timer to regularily empty the flush queues */ - struct timer_list fq_timer; - /* 1 when timer is active, 0 when not */ - atomic_t fq_timer_on; - }; - /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ - dma_addr_t msi_iova; + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; }; - struct list_head msi_page_list; - + /* Number of TLB flushes that have been started */ + atomic64_t fq_flush_start_cnt; + /* Number of TLB flushes that have been finished */ + atomic64_t fq_flush_finish_cnt; + /* Timer to regularily empty the flush queues */ + struct timer_list fq_timer; + /* 1 when timer is active, 0 when not */ + atomic_t fq_timer_on; /* Domain for flush queue callback; NULL if flush queue not in use */ - struct iommu_domain *fq_domain; + struct iommu_domain *fq_domain; /* Options for dma-iommu use */ - struct iommu_dma_options options; - struct mutex mutex; + struct iommu_dma_options options; + struct mutex mutex; +}; + +struct iommu_dma_msi_cookie { + dma_addr_t msi_iova; + struct list_head msi_page_list; }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); @@ -369,40 +360,26 @@ int iommu_dma_init_fq(struct iommu_domain *domain) return 0; } -static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) -{ - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) - return cookie->iovad.granule; - return PAGE_SIZE; -} - -static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) -{ - struct iommu_dma_cookie *cookie; - - cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); - if (cookie) { - INIT_LIST_HEAD(&cookie->msi_page_list); - cookie->type = type; - } - return cookie; -} - /** * iommu_get_dma_cookie - Acquire DMA-API resources for a domain * @domain: IOMMU domain to prepare for DMA-API usage */ int iommu_get_dma_cookie(struct iommu_domain *domain) { - if (domain->iova_cookie) + struct iommu_dma_cookie *cookie; + + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); - if (!domain->iova_cookie) + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); + if (!cookie) return -ENOMEM; - mutex_init(&domain->iova_cookie->mutex); + mutex_init(&cookie->mutex); + INIT_LIST_HEAD(&cookie->msi_page_list); iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); + domain->cookie_type = IOMMU_COOKIE_DMA_IOVA; + domain->iova_cookie = cookie; return 0; } @@ -420,29 +397,30 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) */ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { - struct iommu_dma_cookie *cookie; + struct iommu_dma_msi_cookie *cookie; if (domain->type != IOMMU_DOMAIN_UNMANAGED) return -EINVAL; - if (domain->iova_cookie) + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) return -ENOMEM; cookie->msi_iova = base; - domain->iova_cookie = cookie; + INIT_LIST_HEAD(&cookie->msi_page_list); iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); + domain->cookie_type = IOMMU_COOKIE_DMA_MSI; + domain->msi_cookie = cookie; return 0; } EXPORT_SYMBOL(iommu_get_msi_cookie); /** * iommu_put_dma_cookie - Release a domain's DMA mapping resources - * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or - * iommu_get_msi_cookie() + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() */ void iommu_put_dma_cookie(struct iommu_domain *domain) { @@ -454,20 +432,27 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) return; #endif - if (!cookie) - return; - - if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { + if (cookie->iovad.granule) { iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); } + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) + kfree(msi); + kfree(cookie); +} - list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { - list_del(&msi->list); +/** + * iommu_put_msi_cookie - Release a domain's MSI mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie() + */ +void iommu_put_msi_cookie(struct iommu_domain *domain) +{ + struct iommu_dma_msi_cookie *cookie = domain->msi_cookie; + struct iommu_dma_msi_page *msi, *tmp; + + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) kfree(msi); - } kfree(cookie); - domain->iova_cookie = NULL; } /** @@ -687,7 +672,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev struct iova_domain *iovad; int ret; - if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) + if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA) return -EINVAL; iovad = &cookie->iovad; @@ -777,9 +762,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova; - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { - cookie->msi_iova += size; - return cookie->msi_iova - size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) { + domain->msi_cookie->msi_iova += size; + return domain->msi_cookie->msi_iova - size; } shift = iova_shift(iovad); @@ -816,16 +801,16 @@ done: return (dma_addr_t)iova << shift; } -static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) +static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova, + size_t size, struct iommu_iotlb_gather *gather) { - struct iova_domain *iovad = &cookie->iovad; + struct iova_domain *iovad = &domain->iova_cookie->iovad; /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) - cookie->msi_iova -= size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) + domain->msi_cookie->msi_iova -= size; else if (gather && gather->queued) - queue_iova(cookie, iova_pfn(iovad, iova), + queue_iova(domain->iova_cookie, iova_pfn(iovad, iova), size >> iova_shift(iovad), &gather->freelist); else @@ -853,7 +838,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, if (!iotlb_gather.queued) iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); + iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather); } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, @@ -881,7 +866,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); return DMA_MAPPING_ERROR; } return iova + iova_off; @@ -1018,7 +1003,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, out_free_sg: sg_free_table(sgt); out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -1495,7 +1480,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return __finalise_sg(dev, sg, nents, iova); out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len, NULL); + iommu_dma_free_iova(domain, iova, iova_len, NULL); out_restore_sg: __invalidate_sg(sg, nents); out: @@ -1773,17 +1758,47 @@ out_err: dev->dma_iommu = false; } +static bool has_msi_cookie(const struct iommu_domain *domain) +{ + return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA || + domain->cookie_type == IOMMU_COOKIE_DMA_MSI); +} + +static size_t cookie_msi_granule(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return domain->iova_cookie->iovad.granule; + case IOMMU_COOKIE_DMA_MSI: + return PAGE_SIZE; + default: + unreachable(); + }; +} + +static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return &domain->iova_cookie->msi_page_list; + case IOMMU_COOKIE_DMA_MSI: + return &domain->msi_cookie->msi_page_list; + default: + unreachable(); + }; +} + static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { - struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct list_head *msi_page_list = cookie_msi_pages(domain); struct iommu_dma_msi_page *msi_page; dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; - size_t size = cookie_msi_granule(cookie); + size_t size = cookie_msi_granule(domain); msi_addr &= ~(phys_addr_t)(size - 1); - list_for_each_entry(msi_page, &cookie->msi_page_list, list) + list_for_each_entry(msi_page, msi_page_list, list) if (msi_page->phys == msi_addr) return msi_page; @@ -1801,11 +1816,11 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, INIT_LIST_HEAD(&msi_page->list); msi_page->phys = msi_addr; msi_page->iova = iova; - list_add(&msi_page->list, &cookie->msi_page_list); + list_add(&msi_page->list, msi_page_list); return msi_page; out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_page: kfree(msi_page); return NULL; @@ -1817,7 +1832,7 @@ static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, struct device *dev = msi_desc_to_dev(desc); const struct iommu_dma_msi_page *msi_page; - if (!domain->iova_cookie) { + if (!has_msi_cookie(domain)) { msi_desc_set_iommu_msi_iova(desc, 0, 0); return 0; } @@ -1827,9 +1842,8 @@ static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, if (!msi_page) return -ENOMEM; - msi_desc_set_iommu_msi_iova( - desc, msi_page->iova, - ilog2(cookie_msi_granule(domain->iova_cookie))); + msi_desc_set_iommu_msi_iova(desc, msi_page->iova, + ilog2(cookie_msi_granule(domain))); return 0; } diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index c12d63457c76..9cca11806e5d 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -13,6 +13,7 @@ void iommu_setup_dma_ops(struct device *dev); int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); +void iommu_put_msi_cookie(struct iommu_domain *domain); int iommu_dma_init_fq(struct iommu_domain *domain); @@ -40,6 +41,10 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } +static inline void iommu_put_msi_cookie(struct iommu_domain *domain) +{ +} + static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { } diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 503c5d23c1ea..ab18bc494eef 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -310,6 +310,7 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, } domain->type = IOMMU_DOMAIN_SVA; + domain->cookie_type = IOMMU_COOKIE_SVA; mmgrab(mm); domain->mm = mm; domain->owner = ops; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0ee17893810f..c92e47f333cb 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1953,8 +1953,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token) { - BUG_ON(!domain); + if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) + return; + domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; domain->handler = handler; domain->handler_token = token; } @@ -2024,9 +2026,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); void iommu_domain_free(struct iommu_domain *domain) { - if (domain->type == IOMMU_DOMAIN_SVA) + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + iommu_put_dma_cookie(domain); + break; + case IOMMU_COOKIE_DMA_MSI: + iommu_put_msi_cookie(domain); + break; + case IOMMU_COOKIE_SVA: mmdrop(domain->mm); - iommu_put_dma_cookie(domain); + break; + default: + break; + } if (domain->ops->free) domain->ops->free(domain); } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 9a89f3a28dc5..fded3f07bfa7 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -160,6 +160,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } } hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* @@ -257,6 +258,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, } hwpt->domain->owner = ops; hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { @@ -315,6 +317,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, } hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e93d2e918599..06cc14e9993d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,7 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_dma_msi_cookie; struct iommu_fault_param; struct iommufd_ctx; struct iommufd_viommu; @@ -165,6 +166,15 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +enum iommu_domain_cookie_type { + IOMMU_COOKIE_NONE, + IOMMU_COOKIE_DMA_IOVA, + IOMMU_COOKIE_DMA_MSI, + IOMMU_COOKIE_FAULT_HANDLER, + IOMMU_COOKIE_SVA, + IOMMU_COOKIE_IOMMUFD, +}; + /* Domain feature flags */ #define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API @@ -211,12 +221,12 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; + enum iommu_domain_cookie_type cookie_type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; - struct iommu_dma_cookie *iova_cookie; int (*iopf_handler)(struct iopf_group *group); #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) @@ -224,10 +234,10 @@ struct iommu_domain { phys_addr_t msi_addr); #endif - union { /* Pointer usable by owner of the domain */ - struct iommufd_hw_pagetable *iommufd_hwpt; /* iommufd */ - }; - union { /* Fault handler */ + union { /* cookie */ + struct iommu_dma_cookie *iova_cookie; + struct iommu_dma_msi_cookie *msi_cookie; + struct iommufd_hw_pagetable *iommufd_hwpt; struct { iommu_fault_handler_t handler; void *handler_token; -- cgit v1.2.3 From 06d54f00f3f5a29cbf43410ac93ee2dd89e3b711 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 24 Mar 2025 21:05:17 -0700 Subject: iommu: Drop sw_msi from iommu_domain There are only two sw_msi implementations in the entire system, thus it's not very necessary to have an sw_msi pointer. Instead, check domain->cookie_type to call the two sw_msi implementations directly from the core code. Link: https://patch.msgid.link/r/7ded87c871afcbaac665b71354de0a335087bf0f.1742871535.git.nicolinc@nvidia.com Suggested-by: Robin Murphy Reviewed-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 14 ++------------ drivers/iommu/dma-iommu.h | 9 +++++++++ drivers/iommu/iommu.c | 18 ++++++++++++++++-- drivers/iommu/iommufd/hw_pagetable.c | 3 --- include/linux/iommu.h | 15 --------------- 5 files changed, 27 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 31a7b4b81656..2bd9f80a83fe 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -94,9 +94,6 @@ static int __init iommu_dma_forcedac_setup(char *str) } early_param("iommu.forcedac", iommu_dma_forcedac_setup); -static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); - /* Number of entries per flush queue */ #define IOVA_DEFAULT_FQ_SIZE 256 #define IOVA_SINGLE_FQ_SIZE 32768 @@ -377,7 +374,6 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) mutex_init(&cookie->mutex); INIT_LIST_HEAD(&cookie->msi_page_list); - iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); domain->cookie_type = IOMMU_COOKIE_DMA_IOVA; domain->iova_cookie = cookie; return 0; @@ -411,7 +407,6 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) cookie->msi_iova = base; INIT_LIST_HEAD(&cookie->msi_page_list); - iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); domain->cookie_type = IOMMU_COOKIE_DMA_MSI; domain->msi_cookie = cookie; return 0; @@ -427,11 +422,6 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - if (domain->sw_msi != iommu_dma_sw_msi) - return; -#endif - if (cookie->iovad.granule) { iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); @@ -1826,8 +1816,8 @@ out_free_page: return NULL; } -static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr) +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); const struct iommu_dma_msi_page *msi_page; diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 9cca11806e5d..eca201c1f963 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -19,6 +19,9 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); + extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ @@ -49,5 +52,11 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline int iommu_dma_sw_msi(struct iommu_domain *domain, + struct msi_desc *desc, phys_addr_t msi_addr) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c92e47f333cb..d96e6fabb4da 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3650,8 +3651,21 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return 0; mutex_lock(&group->mutex); - if (group->domain && group->domain->sw_msi) - ret = group->domain->sw_msi(group->domain, desc, msi_addr); + /* An IDENTITY domain must pass through */ + if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { + switch (group->domain->cookie_type) { + case IOMMU_COOKIE_DMA_MSI: + case IOMMU_COOKIE_DMA_IOVA: + ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); + break; + case IOMMU_COOKIE_IOMMUFD: + ret = iommufd_sw_msi(group->domain, desc, msi_addr); + break; + default: + ret = -EOPNOTSUPP; + break; + } + } mutex_unlock(&group->mutex); return ret; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index fded3f07bfa7..8e87ae71e128 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -161,7 +161,6 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* * Set the coherency mode before we do iopt_table_add_domain() as some @@ -259,7 +258,6 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, hwpt->domain->owner = ops; hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -318,7 +316,6 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 06cc14e9993d..e01c855ae8a7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -229,11 +229,6 @@ struct iommu_domain { struct iommu_domain_geometry geometry; int (*iopf_handler)(struct iopf_group *group); -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); -#endif - union { /* cookie */ struct iommu_dma_cookie *iova_cookie; struct iommu_dma_msi_cookie *msi_cookie; @@ -254,16 +249,6 @@ struct iommu_domain { }; }; -static inline void iommu_domain_set_sw_msi( - struct iommu_domain *domain, - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr)) -{ -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - domain->sw_msi = sw_msi; -#endif -} - static inline bool iommu_is_dma_domain(struct iommu_domain *domain) { return domain->type & __IOMMU_DOMAIN_DMA_API; -- cgit v1.2.3 From 2fb69c602d57f77483b8dcdd12d17408a09f76fe Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:33 -0700 Subject: iommufd: Support pasid attach/replace This extends the below APIs to support PASID. Device drivers to manage pasid attach/replace/detach. int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); The pasid operations share underlying attach/replace/detach infrastructure with the device operations, but still have some different implications: - no reserved region per pasid otherwise SVA architecture is already broken (CPU address space doesn't count device reserved regions); - accordingly no sw_msi trick; Cache coherency enforcement is still applied to pasid operations since it is about memory accesses post page table walking (no matter the walk is per RID or per PASID). Link: https://patch.msgid.link/r/20250321171940.7213-12-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Signed-off-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 59 +++++++++++++++++++++++++--------------- drivers/iommu/iommufd/selftest.c | 8 +++--- drivers/vfio/iommufd.c | 10 ++++--- include/linux/iommufd.h | 9 ++++-- 4 files changed, 53 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ba21b81e43bc..4cc6de03f76e 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -428,9 +428,12 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, } handle->idev = idev; - WARN_ON(pasid != IOMMU_NO_PASID); - rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + else + rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, + &handle->handle); if (rc) goto out_disable_iopf; @@ -464,10 +467,12 @@ static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, { struct iommufd_attach_handle *handle; - WARN_ON(pasid != IOMMU_NO_PASID); - handle = iommufd_device_get_attach_handle(idev, pasid); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + if (pasid == IOMMU_NO_PASID) + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + else + iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); + if (hwpt->fault) { iommufd_auto_response_faults(hwpt, handle); iommufd_fault_iopf_disable(idev); @@ -483,8 +488,6 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, struct iommufd_attach_handle *handle, *old_handle; int rc; - WARN_ON(pasid != IOMMU_NO_PASID); - rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); if (rc) return rc; @@ -502,8 +505,12 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, } handle->idev = idev; - rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + else + rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, + pasid, &handle->handle); if (rc) goto out_disable_iopf; @@ -904,22 +911,25 @@ out_put_pt_obj: } /** - * iommufd_device_attach - Connect a device to an iommu_domain + * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach + * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. + * This connects the device/pasid to an iommu_domain, either automatically + * or manually selected. Once this completes the device could do DMA with + * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, + rc = iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_attach); if (rc) return rc; @@ -934,8 +944,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** - * iommufd_device_replace - Change the device's iommu_domain + * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change + * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * @@ -946,27 +957,31 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be - * called if iommufd_device_attach() has already succeeded. + * called if iommufd_device_attach() has already succeeded. @pasid is + * IOMMU_NO_PASID for no pasid usage. */ -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { - return iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, + return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** - * iommufd_device_detach - Disconnect a device to an iommu_domain + * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach + * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. + * @pasid is IOMMU_NO_PASID for no pasid usage. */ -void iommufd_device_detach(struct iommufd_device *idev) +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev, IOMMU_NO_PASID); + hwpt = iommufd_hw_pagetable_detach(idev, pasid); iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d55dde28e9bc..0b3f5cbf242b 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -945,7 +945,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, } sobj->idev.idev = idev; - rc = iommufd_device_attach(idev, &pt_id); + rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_unbind; @@ -960,7 +960,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, return 0; out_detach: - iommufd_device_detach(idev); + iommufd_device_detach(idev, IOMMU_NO_PASID); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -994,7 +994,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, goto out_dev_obj; } - rc = iommufd_device_replace(sobj->idev.idev, &pt_id); + rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_dev_obj; @@ -1655,7 +1655,7 @@ void iommufd_selftest_destroy(struct iommufd_object *obj) switch (sobj->type) { case TYPE_IDEV: - iommufd_device_detach(sobj->idev.idev); + iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID); iommufd_device_unbind(sobj->idev.idev); mock_dev_destroy(sobj->idev.mock_dev); break; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 516294fd901b..37e1efa2c7bf 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -128,7 +128,7 @@ void vfio_iommufd_physical_unbind(struct vfio_device *vdev) lockdep_assert_held(&vdev->dev_set->lock); if (vdev->iommufd_attached) { - iommufd_device_detach(vdev->iommufd_device); + iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; } iommufd_device_unbind(vdev->iommufd_device); @@ -146,9 +146,11 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) return -EINVAL; if (vdev->iommufd_attached) - rc = iommufd_device_replace(vdev->iommufd_device, pt_id); + rc = iommufd_device_replace(vdev->iommufd_device, + IOMMU_NO_PASID, pt_id); else - rc = iommufd_device_attach(vdev->iommufd_device, pt_id); + rc = iommufd_device_attach(vdev->iommufd_device, + IOMMU_NO_PASID, pt_id); if (rc) return rc; vdev->iommufd_attached = true; @@ -163,7 +165,7 @@ void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) if (WARN_ON(!vdev->iommufd_device) || !vdev->iommufd_attached) return; - iommufd_device_detach(vdev->iommufd_device); + iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 60eff9272551..34b6e6ca4bfa 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -54,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); -void iommufd_device_detach(struct iommufd_device *idev); +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); u32 iommufd_device_to_id(struct iommufd_device *idev); -- cgit v1.2.3 From 7fe6b987166b901efc5c6fce5fe853c9ebb835be Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:39 -0700 Subject: ida: Add ida_find_first_range() There is no helpers for user to check if a given ID is allocated or not, neither a helper to loop all the allocated IDs in an IDA and do something for cleanup. With the two needs, a helper to get the lowest allocated ID of a range and two variants based on it. Caller can check if a given ID is allocated or not by: bool ida_exists(struct ida *ida, unsigned int id) Caller can iterate all allocated IDs by: int id; while ((id = ida_find_first(&pasid_ida)) >= 0) { //anything to do with the allocated ID ida_free(pasid_ida, pasid); } Link: https://patch.msgid.link/r/20250321180143.8468-2-yi.l.liu@intel.com Cc: Matthew Wilcox (Oracle) Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Matthew Wilcox (Oracle) Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/idr.h | 11 +++++++++ lib/idr.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/test_ida.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index da5f5fa4a3a6..718f9b1b91af 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -257,6 +257,7 @@ struct ida { int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. @@ -328,4 +329,14 @@ static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } + +static inline bool ida_exists(struct ida *ida, unsigned int id) +{ + return ida_find_first_range(ida, id, id) == id; +} + +static inline int ida_find_first(struct ida *ida) +{ + return ida_find_first_range(ida, 0, ~0); +} #endif /* __IDR_H__ */ diff --git a/lib/idr.c b/lib/idr.c index da36054c3ca0..e2adc457abb4 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -476,6 +476,73 @@ nospc: } EXPORT_SYMBOL(ida_alloc_range); +/** + * ida_find_first_range - Get the lowest used ID. + * @ida: IDA handle. + * @min: Lowest ID to get. + * @max: Highest ID to get. + * + * Get the lowest used ID between @min and @max, inclusive. The returned + * ID will not exceed %INT_MAX, even if @max is larger. + * + * Context: Any context. Takes and releases the xa_lock. + * Return: The lowest used ID, or errno if no used ID is found. + */ +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max) +{ + unsigned long index = min / IDA_BITMAP_BITS; + unsigned int offset = min % IDA_BITMAP_BITS; + unsigned long *addr, size, bit; + unsigned long tmp = 0; + unsigned long flags; + void *entry; + int ret; + + if ((int)min < 0) + return -EINVAL; + if ((int)max < 0) + max = INT_MAX; + + xa_lock_irqsave(&ida->xa, flags); + + entry = xa_find(&ida->xa, &index, max / IDA_BITMAP_BITS, XA_PRESENT); + if (!entry) { + ret = -ENOENT; + goto err_unlock; + } + + if (index > min / IDA_BITMAP_BITS) + offset = 0; + if (index * IDA_BITMAP_BITS + offset > max) { + ret = -ENOENT; + goto err_unlock; + } + + if (xa_is_value(entry)) { + tmp = xa_to_value(entry); + addr = &tmp; + size = BITS_PER_XA_VALUE; + } else { + addr = ((struct ida_bitmap *)entry)->bitmap; + size = IDA_BITMAP_BITS; + } + + bit = find_next_bit(addr, size, offset); + + xa_unlock_irqrestore(&ida->xa, flags); + + if (bit == size || + index * IDA_BITMAP_BITS + bit > max) + return -ENOENT; + + return index * IDA_BITMAP_BITS + bit; + +err_unlock: + xa_unlock_irqrestore(&ida->xa, flags); + return ret; +} +EXPORT_SYMBOL(ida_find_first_range); + /** * ida_free() - Release an allocated ID. * @ida: IDA handle. diff --git a/lib/test_ida.c b/lib/test_ida.c index c80155a1956d..63078f8dc13f 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -189,6 +189,75 @@ static void ida_check_bad_free(struct ida *ida) IDA_BUG_ON(ida, !ida_is_empty(ida)); } +/* + * Check ida_find_first_range() and varriants. + */ +static void ida_check_find_first(struct ida *ida) +{ + /* IDA is empty; all of the below should be not exist */ + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, ida_exists(ida, 3)); + IDA_BUG_ON(ida, ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a single value entry */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 3, GFP_KERNEL) != 3); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + IDA_BUG_ON(ida, ida_alloc_min(ida, 63, GFP_KERNEL) != 63); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a single bitmap */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 1023, GFP_KERNEL) != 1023); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, !ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a tree */ + IDA_BUG_ON(ida, ida_alloc_min(ida, (1 << 20) - 1, GFP_KERNEL) != (1 << 20) - 1); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, !ida_exists(ida, 1023)); + IDA_BUG_ON(ida, !ida_exists(ida, (1 << 20) - 1)); + + /* Now try to find first */ + IDA_BUG_ON(ida, ida_find_first(ida) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, -1, 2) != -EINVAL); + IDA_BUG_ON(ida, ida_find_first_range(ida, 0, 2) != -ENOENT); // no used ID + IDA_BUG_ON(ida, ida_find_first_range(ida, 0, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 3, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 2, 4) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 3) != -ENOENT); // min > max, fail + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 60) != -ENOENT); // no used ID + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 64) != 63); + IDA_BUG_ON(ida, ida_find_first_range(ida, 63, 63) != 63); + IDA_BUG_ON(ida, ida_find_first_range(ida, 64, 1026) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1023, 1023) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1023, (1 << 20) - 1) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1024, (1 << 20) - 1) != (1 << 20) - 1); + IDA_BUG_ON(ida, ida_find_first_range(ida, (1 << 20), INT_MAX) != -ENOENT); + + ida_free(ida, 3); + ida_free(ida, 63); + ida_free(ida, 1023); + ida_free(ida, (1 << 20) - 1); + + IDA_BUG_ON(ida, !ida_is_empty(ida)); +} + static DEFINE_IDA(ida); static int ida_checks(void) @@ -202,6 +271,7 @@ static int ida_checks(void) ida_check_max(&ida); ida_check_conv(&ida); ida_check_bad_free(&ida); + ida_check_find_first(&ida); printk("IDA: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run != tests_passed) ? 0 : -EINVAL; -- cgit v1.2.3 From 290641346d0d1eaf400c4f968d5b2cd91f483733 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:40 -0700 Subject: vfio-iommufd: Support pasid [at|de]tach for physical VFIO devices This adds pasid_at|de]tach_ioas ops for attaching hwpt to pasid of a device and the helpers for it. For now, only vfio-pci supports pasid attach/detach. Link: https://patch.msgid.link/r/20250321180143.8468-3-yi.l.liu@intel.com Signed-off-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/iommufd.c | 50 +++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci.c | 2 ++ include/linux/vfio.h | 14 +++++++++++++ 3 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 37e1efa2c7bf..c8c3a2d53f86 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -119,14 +119,22 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, if (IS_ERR(idev)) return PTR_ERR(idev); vdev->iommufd_device = idev; + ida_init(&vdev->pasids); return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); void vfio_iommufd_physical_unbind(struct vfio_device *vdev) { + int pasid; + lockdep_assert_held(&vdev->dev_set->lock); + while ((pasid = ida_find_first(&vdev->pasids)) >= 0) { + iommufd_device_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); + } + if (vdev->iommufd_attached) { iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; @@ -170,6 +178,48 @@ void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id) +{ + int rc; + + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return -EINVAL; + + if (ida_exists(&vdev->pasids, pasid)) + return iommufd_device_replace(vdev->iommufd_device, + pasid, pt_id); + + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); + if (rc < 0) + return rc; + + rc = iommufd_device_attach(vdev->iommufd_device, pasid, pt_id); + if (rc) + ida_free(&vdev->pasids, pasid); + + return rc; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_attach_ioas); + +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return; + + if (!ida_exists(&vdev->pasids, pasid)) + return; + + iommufd_device_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_detach_ioas); + /* * The emulated standard ops mean that vfio_device is going to use the * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e727941f589d..6f7ae7e5b7b0 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -144,6 +144,8 @@ static const struct vfio_device_ops vfio_pci_ops = { .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, .detach_ioas = vfio_iommufd_physical_detach_ioas, + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 000a6cab2d31..707b00772ce1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -67,6 +67,7 @@ struct vfio_device { struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -91,6 +92,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -115,6 +118,9 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, + u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -139,6 +145,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -166,6 +176,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) -- cgit v1.2.3 From 803f97298e7de9242eb677a1351dcafbbcc9117e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:42 -0700 Subject: iommufd: Extend IOMMU_GET_HW_INFO to report PASID capability PASID usage requires PASID support in both device and IOMMU. Since the iommu drivers always enable the PASID capability for the device if it is supported, this extends the IOMMU_GET_HW_INFO to report the PASID capability to userspace. Also, enhances the selftest accordingly. Link: https://patch.msgid.link/r/20250321180143.8468-5-yi.l.liu@intel.com Cc: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao #aarch64 platform Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 34 +++++++++++++++++++++++++++++++++- drivers/pci/ats.c | 33 +++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 3 +++ include/uapi/linux/iommufd.h | 14 +++++++++++++- 4 files changed, 82 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 1605f6c0e1ee..2307daad65c0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include @@ -1455,7 +1456,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1512,6 +1514,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c6b266c772c8..ec6c8dbdc5e9 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -538,4 +538,37 @@ int pci_max_pasids(struct pci_dev *pdev) return (1 << FIELD_GET(PCI_PASID_CAP_WIDTH, supported)); } EXPORT_SYMBOL_GPL(pci_max_pasids); + +/** + * pci_pasid_status - Check the PASID status + * @pdev: PCI device structure + * + * Returns a negative value when no PASID capability is present. + * Otherwise the value of the control register is returned. + * Status reported are: + * + * PCI_PASID_CTRL_ENABLE - PASID enabled + * PCI_PASID_CTRL_EXEC - Execute permission enabled + * PCI_PASID_CTRL_PRIV - Privileged mode enabled + */ +int pci_pasid_status(struct pci_dev *pdev) +{ + int pasid; + u16 ctrl; + + if (pdev->is_virtfn) + pdev = pci_physfn(pdev); + + pasid = pdev->pasid_cap; + if (!pasid) + return -EINVAL; + + pci_read_config_word(pdev, pasid + PCI_PASID_CTRL, &ctrl); + + ctrl &= PCI_PASID_CTRL_ENABLE | PCI_PASID_CTRL_EXEC | + PCI_PASID_CTRL_PRIV; + + return ctrl; +} +EXPORT_SYMBOL_GPL(pci_pasid_status); #endif /* CONFIG_PCI_PASID */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 6901804ec736..e2c04e58a997 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -612,9 +612,17 @@ enum iommu_hw_info_type { * IOMMU_HWPT_GET_DIRTY_BITMAP * IOMMU_HWPT_SET_DIRTY_TRACKING * + * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. + * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. */ enum iommufd_hw_capabilities { IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, + IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; /** @@ -630,6 +638,9 @@ enum iommufd_hw_capabilities { * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined * in the enum iommu_hw_capabilities. + * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. + * PCI devices turn to out_capabilities to check if the + * specific capabilities is supported or not. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -653,7 +664,8 @@ struct iommu_hw_info { __u32 data_len; __aligned_u64 data_uptr; __u32 out_data_type; - __u32 __reserved; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) -- cgit v1.2.3