diff options
Diffstat (limited to 'drivers/iommu/iommufd')
-rw-r--r-- | drivers/iommu/iommufd/Kconfig | 2 | ||||
-rw-r--r-- | drivers/iommu/iommufd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/iommufd/device.c | 671 | ||||
-rw-r--r-- | drivers/iommu/iommufd/driver.c | 309 | ||||
-rw-r--r-- | drivers/iommu/iommufd/eventq.c | 546 | ||||
-rw-r--r-- | drivers/iommu/iommufd/fault.c | 446 | ||||
-rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 51 | ||||
-rw-r--r-- | drivers/iommu/iommufd/io_pagetable.c | 57 | ||||
-rw-r--r-- | drivers/iommu/iommufd/io_pagetable.h | 5 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 304 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_test.h | 60 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iova_bitmap.c | 11 | ||||
-rw-r--r-- | drivers/iommu/iommufd/main.c | 264 | ||||
-rw-r--r-- | drivers/iommu/iommufd/pages.c | 21 | ||||
-rw-r--r-- | drivers/iommu/iommufd/selftest.c | 601 | ||||
-rw-r--r-- | drivers/iommu/iommufd/vfio_compat.c | 6 | ||||
-rw-r--r-- | drivers/iommu/iommufd/viommu.c | 311 |
17 files changed, 2787 insertions, 880 deletions
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 0a07f9449fd9..2beeb4f60ee5 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config IOMMUFD_DRIVER_CORE - tristate + bool default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n config IOMMUFD diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index cb784da6cddc..71d692c9a8f4 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ - fault.o \ + eventq.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 5fd3dd420290..65fbd098f9e9 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include <linux/iommu.h> #include <linux/iommufd.h> +#include <linux/pci-ats.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> @@ -17,12 +18,17 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +struct iommufd_attach { + struct iommufd_hw_pagetable *hwpt; + struct xarray device_array; +}; + static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); - WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + WARN_ON(!xa_empty(&igroup->pasid_attach)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); @@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); - INIT_LIST_HEAD(&new_igroup->device_list); + xa_init(&new_igroup->pasid_attach); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; @@ -131,6 +137,57 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, } } +static void iommufd_device_remove_vdev(struct iommufd_device *idev) +{ + struct iommufd_vdevice *vdev; + + mutex_lock(&idev->igroup->lock); + /* prevent new references from vdev */ + idev->destroying = true; + /* vdev has been completely destroyed by userspace */ + if (!idev->vdev) + goto out_unlock; + + vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id); + /* + * An ongoing vdev destroy ioctl has removed the vdev from the object + * xarray, but has not finished iommufd_vdevice_destroy() yet as it + * needs the same mutex. We exit the locking then wait on wait_cnt + * reference for the vdev destruction. + */ + if (IS_ERR(vdev)) + goto out_unlock; + + /* Should never happen */ + if (WARN_ON(vdev != idev->vdev)) { + iommufd_put_object(idev->ictx, &vdev->obj); + goto out_unlock; + } + + /* + * vdev is still alive. Hold a users refcount to prevent racing with + * userspace destruction, then use iommufd_object_tombstone_user() to + * destroy it and leave a tombstone. + */ + refcount_inc(&vdev->obj.users); + iommufd_put_object(idev->ictx, &vdev->obj); + mutex_unlock(&idev->igroup->lock); + iommufd_object_tombstone_user(idev->ictx, &vdev->obj); + return; + +out_unlock: + mutex_unlock(&idev->igroup->lock); +} + +void iommufd_device_pre_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + /* Release the wait_cnt reference on this */ + iommufd_device_remove_vdev(idev); +} + void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = @@ -215,7 +272,6 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; - mutex_init(&idev->iopf_lock); /* * If the caller fails after this success it must call @@ -233,7 +289,7 @@ out_group_put: iommufd_put_group(igroup); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD"); /** * iommufd_ctx_has_group - True if any device within the group is bound @@ -264,7 +320,7 @@ bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) xa_unlock(&ictx->objects); return false; } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD"); /** * iommufd_device_unbind - Undo iommufd_device_bind() @@ -279,70 +335,97 @@ void iommufd_device_unbind(struct iommufd_device *idev) { iommufd_object_destroy_user(idev->ictx, &idev->obj); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD"); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) { return idev->ictx; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD"); u32 iommufd_device_to_id(struct iommufd_device *idev) { return idev->obj.id; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); + +static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, + ioasid_t pasid) +{ + struct iommufd_attach *attach; + struct iommufd_device *idev; + unsigned int count = 0; + unsigned long index; + + lockdep_assert_held(&igroup->lock); + + attach = xa_load(&igroup->pasid_attach, pasid); + if (attach) + xa_for_each(&attach->device_array, index, idev) + count++; + return count; +} +#ifdef CONFIG_IRQ_MSI_IOMMU static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { - phys_addr_t sw_msi_start = igroup->sw_msi_start; - int rc; + struct iommufd_ctx *ictx = igroup->ictx; + struct iommufd_sw_msi_map *cur; + + if (igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; /* - * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to - * call iommu_get_msi_cookie() on its behalf. This is necessary to setup - * the MSI window so iommu_dma_prepare_msi() can install pages into our - * domain after request_irq(). If it is not done interrupts will not - * work on this domain. - * - * FIXME: This is conceptually broken for iommufd since we want to allow - * userspace to change the domains, eg switch from an identity IOAS to a - * DMA IOAS. There is currently no way to create a MSI window that - * matches what the IRQ layer actually expects in a newly created - * domain. + * Install all the MSI pages the device has been using into the domain */ - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { - rc = iommu_get_msi_cookie(hwpt_paging->common.domain, - sw_msi_start); + guard(mutex)(&ictx->sw_msi_lock); + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + int rc; + + if (cur->sw_msi_start != igroup->sw_msi_start || + !test_bit(cur->id, igroup->required_sw_msi.bitmap)) + continue; + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); if (rc) return rc; - - /* - * iommu_get_msi_cookie() can only be called once per domain, - * it returns -EBUSY on later calls. - */ - hwpt_paging->msi_cookie = true; } return 0; } +#else +static inline int +iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + return 0; +} +#endif + +static bool +iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) +{ + lockdep_assert_held(&igroup->lock); + return !xa_load(&igroup->pasid_attach, pasid); +} static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_group *igroup = idev->igroup; int rc; - lockdep_assert_held(&idev->igroup->lock); + lockdep_assert_held(&igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, - &idev->igroup->sw_msi_start); + &igroup->sw_msi_start); if (rc) return rc; - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); + if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { + rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); @@ -352,23 +435,216 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, return 0; } +/* The device attach/detach/replace helpers for attach_handle */ + +static bool iommufd_device_is_attached(struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach *attach; + + attach = xa_load(&idev->igroup->pasid_attach, pasid); + return xa_load(&attach->device_array, idev->obj.id); +} + +static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_group *igroup = idev->igroup; + + lockdep_assert_held(&igroup->lock); + + if (pasid == IOMMU_NO_PASID) { + unsigned long start = IOMMU_NO_PASID; + + if (!hwpt->pasid_compat && + xa_find_after(&igroup->pasid_attach, + &start, UINT_MAX, XA_PRESENT)) + return -EINVAL; + } else { + struct iommufd_attach *attach; + + if (!hwpt->pasid_compat) + return -EINVAL; + + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + if (attach && attach->hwpt && !attach->hwpt->pasid_compat) + return -EINVAL; + } + + return 0; +} + +static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct pci_dev *pdev; + + if (!hwpt->fault || !dev_is_pci(idev->dev)) + return true; + + /* + * Once we turn on PCI/PRI support for VF, the response failure code + * should not be forwarded to the hardware due to PRI being a shared + * resource between PF and VFs. There is no coordination for this + * shared capability. This waits for a vPRI reset to recover. + */ + pdev = to_pci_dev(idev->dev); + + return (!pdev->is_virtfn || !pci_pri_supported(pdev)); +} + +static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach_handle *handle; + int rc; + + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + if (pasid == IOMMU_NO_PASID) + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + else + rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, + &handle->handle); + if (rc) + goto out_free_handle; + + return 0; + +out_free_handle: + kfree(handle); + return rc; +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) +{ + struct iommu_attach_handle *handle; + + lockdep_assert_held(&idev->igroup->lock); + + handle = iommu_attach_handle_get(idev->igroup->group, pasid, 0); + if (IS_ERR(handle)) + return NULL; + return to_iommufd_handle(handle); +} + +static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev, pasid); + if (pasid == IOMMU_NO_PASID) + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + else + iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); + + iommufd_auto_response_faults(hwpt, handle); + kfree(handle); +} + +static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *old_handle; + int rc; + + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + + old_handle = iommufd_device_get_attach_handle(idev, pasid); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + if (pasid == IOMMU_NO_PASID) + rc = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + else + rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, + pasid, &handle->handle); + if (rc) + goto out_free_handle; + + iommufd_auto_response_faults(hwpt, old_handle); + kfree(old_handle); + + return 0; + +out_free_handle: + kfree(handle); + return rc; +} + int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { - rc = -EINVAL; + attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(attach)) { + rc = xa_err(attach); goto err_unlock; } - if (hwpt_paging) { + if (!attach) { + attach = kzalloc(sizeof(*attach), GFP_KERNEL); + if (!attach) { + rc = -ENOMEM; + goto err_release_pasid; + } + xa_init(&attach->device_array); + } + + old_hwpt = attach->hwpt; + + rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, + GFP_KERNEL); + if (rc) { + WARN_ON(rc == -EBUSY && !old_hwpt); + goto err_free_attach; + } + + if (old_hwpt && old_hwpt != hwpt) { + rc = -EINVAL; + goto err_release_devid; + } + + if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) - goto err_unlock; + goto err_release_devid; } /* @@ -378,51 +654,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * reserved regions are only updated during individual device * attachment. */ - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_hwpt_attach_device(hwpt, idev); + if (iommufd_group_first_attach(igroup, pasid)) { + rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; - idev->igroup->hwpt = hwpt; + attach->hwpt = hwpt; + WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, + GFP_KERNEL))); } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &idev->igroup->device_list); - mutex_unlock(&idev->igroup->lock); + WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, + idev, GFP_KERNEL))); + mutex_unlock(&igroup->lock); return 0; err_unresv: - if (hwpt_paging) + if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); +err_release_devid: + xa_release(&attach->device_array, idev->obj.id); +err_free_attach: + if (iommufd_group_first_attach(igroup, pasid)) + kfree(attach); +err_release_pasid: + if (iommufd_group_first_attach(igroup, pasid)) + xa_release(&igroup->pasid_attach, pasid); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return rc; } struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev) +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { - struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; - struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_attach *attach; - mutex_lock(&idev->igroup->lock); - list_del(&idev->group_item); - if (list_empty(&idev->igroup->device_list)) { - iommufd_hwpt_detach_device(hwpt, idev); - idev->igroup->hwpt = NULL; + mutex_lock(&igroup->lock); + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { + mutex_unlock(&igroup->lock); + return NULL; + } + + hwpt = attach->hwpt; + hwpt_paging = find_hwpt_paging(hwpt); + + xa_erase(&attach->device_array, idev->obj.id); + if (xa_empty(&attach->device_array)) { + iommufd_hwpt_detach_device(hwpt, idev, pasid); + xa_erase(&igroup->pasid_attach, pasid); + kfree(attach); } - if (hwpt_paging) + if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy hwpt */ return hwpt; } static struct iommufd_hw_pagetable * -iommufd_device_do_attach(struct iommufd_device *idev, +iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { int rc; - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) return ERR_PTR(rc); return NULL; @@ -432,11 +731,14 @@ static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; lockdep_assert_held(&igroup->lock); - list_for_each_entry(cur, &igroup->device_list, group_item) + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + xa_for_each(&attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } @@ -445,14 +747,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_hwpt_paging *old_hwpt_paging; + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; int rc; lockdep_assert_held(&igroup->lock); - old_hwpt_paging = find_hwpt_paging(igroup->hwpt); + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + old_hwpt_paging = find_hwpt_paging(attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) { + xa_for_each(&attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -471,69 +776,81 @@ err_unresv: } static struct iommufd_hw_pagetable * -iommufd_device_do_replace(struct iommufd_device *idev, +iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; unsigned int num_devices; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); + + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { + rc = -EINVAL; + goto err_unlock; + } + + old_hwpt = attach->hwpt; + + WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); - if (igroup->hwpt == NULL) { + if (!iommufd_device_is_attached(idev, pasid)) { rc = -EINVAL; goto err_unlock; } - if (hwpt == igroup->hwpt) { - mutex_unlock(&idev->igroup->lock); + if (hwpt == old_hwpt) { + mutex_unlock(&igroup->lock); return NULL; } - old_hwpt = igroup->hwpt; - if (hwpt_paging) { + if (attach_resv) { rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } - rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); + rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); if (rc) goto err_unresv; old_hwpt_paging = find_hwpt_paging(old_hwpt); - if (old_hwpt_paging && + if (old_hwpt_paging && pasid == IOMMU_NO_PASID && (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); - igroup->hwpt = hwpt; + attach->hwpt = hwpt; - num_devices = list_count_nodes(&igroup->device_list); + num_devices = iommufd_group_device_num(igroup, pasid); /* - * Move the refcounts held by the device_list to the new hwpt. Retain a + * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_paging) + if (attach_resv) iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return ERR_PTR(rc); } typedef struct iommufd_hw_pagetable *(*attach_fn)( - struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + struct iommufd_device *idev, ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in @@ -541,7 +858,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)( * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * -iommufd_device_auto_get_domain(struct iommufd_device *idev, +iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { @@ -570,7 +887,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* @@ -588,8 +905,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } - hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, - immediate_attach, NULL); + hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, + 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; @@ -597,7 +914,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!immediate_attach) { - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { @@ -618,8 +935,9 @@ out_unlock: return destroy_hwpt; } -static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, - attach_fn do_attach) +static int iommufd_device_change_pt(struct iommufd_device *idev, + ioasid_t pasid, + u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; @@ -634,7 +952,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -643,8 +961,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, - do_attach); + destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, + pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -666,22 +984,26 @@ out_put_pt_obj: } /** - * iommufd_device_attach - Connect a device to an iommu_domain + * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach + * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. + * This connects the device/pasid to an iommu_domain, either automatically + * or manually selected. Once this completes the device could do DMA with + * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + rc = iommufd_device_change_pt(idev, pasid, pt_id, + &iommufd_device_do_attach); if (rc) return rc; @@ -692,11 +1014,12 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) refcount_inc(&idev->obj.users); return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** - * iommufd_device_replace - Change the device's iommu_domain + * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change + * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * @@ -707,31 +1030,37 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be - * called if iommufd_device_attach() has already succeeded. + * called if iommufd_device_attach() has already succeeded. @pasid is + * IOMMU_NO_PASID for no pasid usage. */ -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { - return iommufd_device_change_pt(idev, pt_id, + return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** - * iommufd_device_detach - Disconnect a device to an iommu_domain + * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach + * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. + * @pasid is IOMMU_NO_PASID for no pasid usage. */ -void iommufd_device_detach(struct iommufd_device *idev) +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev); + hwpt = iommufd_hw_pagetable_detach(idev, pasid); + if (!hwpt) + return; iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD"); /* * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at @@ -770,7 +1099,7 @@ static int iommufd_access_change_ioas(struct iommufd_access *access, } if (cur_ioas) { - if (access->ops->unmap) { + if (!iommufd_access_is_internal(access) && access->ops->unmap) { mutex_unlock(&access->ioas_lock); access->ops->unmap(access->data, 0, ULONG_MAX); mutex_lock(&access->ioas_lock); @@ -806,7 +1135,39 @@ void iommufd_access_destroy_object(struct iommufd_object *obj) if (access->ioas) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); - iommufd_ctx_put(access->ictx); + if (!iommufd_access_is_internal(access)) + iommufd_ctx_put(access->ictx); +} + +static struct iommufd_access *__iommufd_access_create(struct iommufd_ctx *ictx) +{ + struct iommufd_access *access; + + /* + * There is no uAPI for the access object, but to keep things symmetric + * use the object infrastructure anyhow. + */ + access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + if (IS_ERR(access)) + return access; + + /* The calling driver is a user until iommufd_access_destroy() */ + refcount_inc(&access->obj.users); + mutex_init(&access->ioas_lock); + return access; +} + +struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx) +{ + struct iommufd_access *access; + + access = __iommufd_access_create(ictx); + if (IS_ERR(access)) + return access; + access->iova_alignment = PAGE_SIZE; + + iommufd_object_finalize(ictx, &access->obj); + return access; } /** @@ -828,11 +1189,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, { struct iommufd_access *access; - /* - * There is no uAPI for the access object, but to keep things symmetric - * use the object infrastructure anyhow. - */ - access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + access = __iommufd_access_create(ictx); if (IS_ERR(access)) return access; @@ -844,16 +1201,13 @@ iommufd_access_create(struct iommufd_ctx *ictx, else access->iova_alignment = 1; - /* The calling driver is a user until iommufd_access_destroy() */ - refcount_inc(&access->obj.users); access->ictx = ictx; iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; - mutex_init(&access->ioas_lock); return access; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD"); /** * iommufd_access_destroy - Destroy an iommufd_access @@ -865,7 +1219,7 @@ void iommufd_access_destroy(struct iommufd_access *access) { iommufd_object_destroy_user(access->ictx, &access->obj); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD"); void iommufd_access_detach(struct iommufd_access *access) { @@ -877,7 +1231,7 @@ void iommufd_access_detach(struct iommufd_access *access) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD"); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { @@ -893,7 +1247,23 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD"); + +int iommufd_access_attach_internal(struct iommufd_access *access, + struct iommufd_ioas *ioas) +{ + int rc; + + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas)) { + mutex_unlock(&access->ioas_lock); + return -EINVAL; + } + + rc = iommufd_access_change_ioas(access, ioas); + mutex_unlock(&access->ioas_lock); + return rc; +} int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) { @@ -908,7 +1278,7 @@ int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD"); /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it @@ -936,7 +1306,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, xa_lock(&ioas->iopt.access_list); xa_for_each(&ioas->iopt.access_list, index, access) { - if (!iommufd_lock_obj(&access->obj)) + if (!iommufd_lock_obj(&access->obj) || + iommufd_access_is_internal(access)) continue; xa_unlock(&ioas->iopt.access_list); @@ -960,6 +1331,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { + bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; @@ -986,12 +1358,13 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, - min(last_iova, iopt_area_last_iova(area)))); + min(last_iova, iopt_area_last_iova(area))), + internal); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD"); static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) { @@ -1035,6 +1408,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { + bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; @@ -1043,7 +1417,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, /* Driver's ops don't support pin_pages */ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && - WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) + WARN_ON(access->iova_alignment != PAGE_SIZE || + (!internal && !access->ops->unmap))) return -EINVAL; if (!length) @@ -1077,7 +1452,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } rc = iopt_area_add_access(area, index, last_index, out_pages, - flags); + flags, internal); if (rc) goto err_remove; out_pages += last_index - index + 1; @@ -1100,13 +1475,14 @@ err_remove: iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, - iopt_area_last_iova(area)))); + iopt_area_last_iova(area))), + internal); } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD"); /** * iommufd_access_rw - Read or write data under the iova @@ -1127,7 +1503,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; - int rc; + int rc = -EINVAL; if (!length) return -EINVAL; @@ -1170,10 +1546,11 @@ err_out: mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD"); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) { + const u32 SUPPORTED_FLAGS = IOMMU_HW_INFO_FLAG_INPUT_TYPE; struct iommu_hw_info *cmd = ucmd->cmd; void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); const struct iommu_ops *ops; @@ -1183,9 +1560,15 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags & ~SUPPORTED_FLAGS) + return -EOPNOTSUPP; + if (cmd->__reserved[0] || cmd->__reserved[1] || cmd->__reserved[2]) return -EOPNOTSUPP; + /* Clear the type field since drivers don't support a random input */ + if (!(cmd->flags & IOMMU_HW_INFO_FLAG_INPUT_TYPE)) + cmd->in_data_type = IOMMU_HW_INFO_TYPE_DEFAULT; + idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) return PTR_ERR(idev); @@ -1204,7 +1587,7 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) */ if (WARN_ON_ONCE(cmd->out_data_type == IOMMU_HW_INFO_TYPE_NONE)) { - rc = -ENODEV; + rc = -EOPNOTSUPP; goto out_free; } } else { @@ -1240,6 +1623,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 7b67fdf44134..6f1010da221c 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -3,38 +3,91 @@ */ #include "iommufd_private.h" -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type) +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) { - struct iommufd_object *obj; + /* Reject self dependency that dead locks */ + if (obj_dependent == obj_depended) + return -EINVAL; + /* Only support dependency between two objects of the same type */ + if (obj_dependent->type != obj_depended->type) + return -EINVAL; + + refcount_inc(&obj_depended->users); + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_depend, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + if (WARN_ON_ONCE(obj_dependent == obj_depended || + obj_dependent->type != obj_depended->type)) + return; + + refcount_dec(&obj_depended->users); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD"); + +/* + * Allocate an @offset to return to user space to use for an mmap() syscall + * + * Driver should use a per-structure helper in include/linux/iommufd.h + */ +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + struct iommufd_mmap *immap; + unsigned long startp; int rc; - obj = kzalloc(size, GFP_KERNEL_ACCOUNT); - if (!obj) - return ERR_PTR(-ENOMEM); - obj->type = type; - /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); - refcount_set(&obj->users, 1); + if (!PAGE_ALIGNED(mmio_addr)) + return -EINVAL; + if (!length || !PAGE_ALIGNED(length)) + return -EINVAL; - /* - * Reserve an ID in the xarray but do not publish the pointer yet since - * the caller hasn't initialized it yet. Once the pointer is published - * in the xarray and visible to other threads we can't reliably destroy - * it anymore, so the caller must complete all errorable operations - * before calling iommufd_object_finalize(). - */ - rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, - GFP_KERNEL_ACCOUNT); - if (rc) - goto out_free; - return obj; -out_free: - kfree(obj); - return ERR_PTR(rc); + immap = kzalloc(sizeof(*immap), GFP_KERNEL); + if (!immap) + return -ENOMEM; + immap->owner = owner; + immap->length = length; + immap->mmio_addr = mmio_addr; + + /* Skip the first page to ease caller identifying the returned offset */ + rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length, + PAGE_SIZE, ULONG_MAX, GFP_KERNEL); + if (rc < 0) { + kfree(immap); + return rc; + } + + /* mmap() syscall will right-shift the offset in vma->vm_pgoff too */ + immap->vm_pgoff = startp >> PAGE_SHIFT; + *offset = startp; + return 0; } -EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset) +{ + struct iommufd_mmap *immap; + + immap = mtree_erase(&ictx->mt_mmap, offset); + WARN_ON_ONCE(!immap || immap->owner != owner); + kfree(immap); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD"); + +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return vdev->idev->dev; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vdevice_to_device, "IOMMUFD"); /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, @@ -45,9 +98,207 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, lockdep_assert_held(&viommu->vdevs.xa_lock); vdev = xa_load(&viommu->vdevs, vdev_id); - return vdev ? vdev->dev : NULL; + return vdev ? iommufd_vdevice_to_device(vdev) : NULL; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); + +/* Return -ENOENT if device is not associated to the vIOMMU */ +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id) +{ + struct iommufd_vdevice *vdev; + unsigned long index; + int rc = -ENOENT; + + if (WARN_ON_ONCE(!vdev_id)) + return -EINVAL; + + xa_lock(&viommu->vdevs); + xa_for_each(&viommu->vdevs, index, vdev) { + if (iommufd_vdevice_to_device(vdev) == dev) { + *vdev_id = vdev->virt_id; + rc = 0; + break; + } + } + xa_unlock(&viommu->vdevs); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); + +/* + * Typically called in driver's threaded IRQ handler. + * The @type and @event_data must be defined in include/uapi/linux/iommufd.h + */ +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len) +{ + struct iommufd_veventq *veventq; + struct iommufd_vevent *vevent; + int rc = 0; + + if (WARN_ON_ONCE(!data_len || !event_data)) + return -EINVAL; + + down_read(&viommu->veventqs_rwsem); + + veventq = iommufd_viommu_find_veventq(viommu, type); + if (!veventq) { + rc = -EOPNOTSUPP; + goto out_unlock_veventqs; + } + + spin_lock(&veventq->common.lock); + if (veventq->num_events == veventq->depth) { + vevent = &veventq->lost_events_header; + goto out_set_header; + } + + vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); + if (!vevent) { + rc = -ENOMEM; + vevent = &veventq->lost_events_header; + goto out_set_header; + } + memcpy(vevent->event_data, event_data, data_len); + vevent->data_len = data_len; + veventq->num_events++; + +out_set_header: + iommufd_vevent_handler(veventq, vevent); + spin_unlock(&veventq->common.lock); +out_unlock_veventqs: + up_read(&viommu->veventqs_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); + +#ifdef CONFIG_IRQ_MSI_IOMMU +/* + * Get a iommufd_sw_msi_map for the msi physical address requested by the irq + * layer. The mapping to IOVA is global to the iommufd file descriptor, every + * domain that is attached to a device using the same MSI parameters will use + * the same IOVA. + */ +static struct iommufd_sw_msi_map * +iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, + phys_addr_t sw_msi_start) +{ + struct iommufd_sw_msi_map *cur; + unsigned int max_pgoff = 0; + + lockdep_assert_held(&ictx->sw_msi_lock); + + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + if (cur->sw_msi_start != sw_msi_start) + continue; + max_pgoff = max(max_pgoff, cur->pgoff + 1); + if (cur->msi_addr == msi_addr) + return cur; + } + + if (ictx->sw_msi_id >= + BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) + return ERR_PTR(-EOVERFLOW); + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) + return ERR_PTR(-ENOMEM); + + cur->sw_msi_start = sw_msi_start; + cur->msi_addr = msi_addr; + cur->pgoff = max_pgoff; + cur->id = ictx->sw_msi_id++; + list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); + return cur; +} + +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map) +{ + unsigned long iova; + + lockdep_assert_held(&ictx->sw_msi_lock); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { + int rc; + + rc = iommu_map(hwpt_paging->common.domain, iova, + msi_map->msi_addr, PAGE_SIZE, + IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, + GFP_KERNEL_ACCOUNT); + if (rc) + return rc; + __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); + } + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL"); + +/* + * Called by the irq code if the platform translates the MSI address through the + * IOMMU. msi_addr is the physical address of the MSI page. iommufd will + * allocate a fd global iova for the physical page that is the same on all + * domains and devices. + */ +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommufd_hwpt_paging *hwpt_paging; + struct iommu_attach_handle *raw_handle; + struct iommufd_attach_handle *handle; + struct iommufd_sw_msi_map *msi_map; + struct iommufd_ctx *ictx; + unsigned long iova; + int rc; + + /* + * It is safe to call iommu_attach_handle_get() here because the iommu + * core code invokes this under the group mutex which also prevents any + * change of the attach handle for the duration of this function. + */ + iommu_group_mutex_assert(dev); + + raw_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(raw_handle)) + return 0; + hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); + + handle = to_iommufd_handle(raw_handle); + /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ + if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; + + ictx = handle->idev->ictx; + guard(mutex)(&ictx->sw_msi_lock); + /* + * The input msi_addr is the exact byte offset of the MSI doorbell, we + * assume the caller has checked that it is contained with a MMIO region + * that is secure to map at PAGE_SIZE. + */ + msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, + msi_addr & PAGE_MASK, + handle->idev->igroup->sw_msi_start); + if (IS_ERR(msi_map)) + return PTR_ERR(msi_map); + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); + if (rc) + return rc; + __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); + return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD"); +#endif MODULE_DESCRIPTION("iommufd code shared with builtin modules"); +MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c new file mode 100644 index 000000000000..fc4de63b0bce --- /dev/null +++ b/drivers/iommu/iommufd/eventq.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/iommufd.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/poll.h> +#include <uapi/linux/iommufd.h> + +#include "../iommu-priv.h" +#include "iommufd_private.h" + +/* IOMMUFD_OBJ_FAULT Functions */ +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) +{ + struct iommufd_fault *fault = hwpt->fault; + struct iopf_group *group, *next; + struct list_head free_list; + unsigned long index; + + if (!fault || !handle) + return; + INIT_LIST_HEAD(&free_list); + + mutex_lock(&fault->mutex); + spin_lock(&fault->common.lock); + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { + if (group->attach_handle != &handle->handle) + continue; + list_move(&group->node, &free_list); + } + spin_unlock(&fault->common.lock); + + list_for_each_entry_safe(group, next, &free_list, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + + xa_for_each(&fault->response, index, group) { + if (group->attach_handle != &handle->handle) + continue; + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + mutex_unlock(&fault->mutex); +} + +void iommufd_fault_destroy(struct iommufd_object *obj) +{ + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iopf_group *group, *next; + unsigned long index; + + /* + * The iommufd object's reference count is zero at this point. + * We can be confident that no other threads are currently + * accessing this pointer. Therefore, acquiring the mutex here + * is unnecessary. + */ + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_for_each(&fault->response, index, group) { + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_destroy(&fault->response); + mutex_destroy(&fault->mutex); +} + +static void iommufd_compose_fault_message(struct iommu_fault *fault, + struct iommu_hwpt_pgfault *hwpt_fault, + struct iommufd_device *idev, + u32 cookie) +{ + hwpt_fault->flags = fault->prm.flags; + hwpt_fault->dev_id = idev->obj.id; + hwpt_fault->pasid = fault->prm.pasid; + hwpt_fault->grpid = fault->prm.grpid; + hwpt_fault->perm = fault->prm.perm; + hwpt_fault->addr = fault->prm.addr; + hwpt_fault->length = 0; + hwpt_fault->cookie = cookie; +} + +/* Fetch the first node out of the fault->deliver list */ +static struct iopf_group * +iommufd_fault_deliver_fetch(struct iommufd_fault *fault) +{ + struct list_head *list = &fault->common.deliver; + struct iopf_group *group = NULL; + + spin_lock(&fault->common.lock); + if (!list_empty(list)) { + group = list_first_entry(list, struct iopf_group, node); + list_del(&group->node); + } + spin_unlock(&fault->common.lock); + return group; +} + +/* Restore a node back to the head of the fault->deliver list */ +static void iommufd_fault_deliver_restore(struct iommufd_fault *fault, + struct iopf_group *group) +{ + spin_lock(&fault->common.lock); + list_add(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); +} + +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iommu_hwpt_pgfault data = {}; + struct iommufd_device *idev; + struct iopf_group *group; + struct iopf_fault *iopf; + size_t done = 0; + int rc = 0; + + if (*ppos || count % fault_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while ((group = iommufd_fault_deliver_fetch(fault))) { + if (done >= count || + group->fault_count * fault_size > count - done) { + iommufd_fault_deliver_restore(fault, group); + break; + } + + rc = xa_alloc(&fault->response, &group->cookie, group, + xa_limit_32b, GFP_KERNEL); + if (rc) { + iommufd_fault_deliver_restore(fault, group); + break; + } + + idev = to_iommufd_handle(group->attach_handle)->idev; + list_for_each_entry(iopf, &group->faults, list) { + iommufd_compose_fault_message(&iopf->fault, + &data, idev, + group->cookie); + if (copy_to_user(buf + done, &data, fault_size)) { + xa_erase(&fault->response, group->cookie); + iommufd_fault_deliver_restore(fault, group); + rc = -EFAULT; + break; + } + done += fault_size; + } + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t response_size = sizeof(struct iommu_hwpt_page_response); + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iommu_hwpt_page_response response; + struct iopf_group *group; + size_t done = 0; + int rc = 0; + + if (*ppos || count % response_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (count > done) { + rc = copy_from_user(&response, buf + done, response_size); + if (rc) + break; + + static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == + (int)IOMMU_PAGE_RESP_SUCCESS); + static_assert((int)IOMMUFD_PAGE_RESP_INVALID == + (int)IOMMU_PAGE_RESP_INVALID); + if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && + response.code != IOMMUFD_PAGE_RESP_INVALID) { + rc = -EINVAL; + break; + } + + group = xa_erase(&fault->response, response.cookie); + if (!group) { + rc = -EINVAL; + break; + } + + iopf_group_response(group, response.code); + iopf_free_group(group); + done += response_size; + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +/* IOMMUFD_OBJ_VEVENTQ Functions */ + +void iommufd_veventq_abort(struct iommufd_object *obj) +{ + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_viommu *viommu = veventq->viommu; + struct iommufd_vevent *cur, *next; + + lockdep_assert_held_write(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(cur, next, &eventq->deliver, node) { + list_del(&cur->node); + if (cur != &veventq->lost_events_header) + kfree(cur); + } + + refcount_dec(&viommu->obj.users); + list_del(&veventq->node); +} + +void iommufd_veventq_destroy(struct iommufd_object *obj) +{ + struct iommufd_veventq *veventq = eventq_to_veventq( + container_of(obj, struct iommufd_eventq, obj)); + + down_write(&veventq->viommu->veventqs_rwsem); + iommufd_veventq_abort(obj); + up_write(&veventq->viommu->veventqs_rwsem); +} + +static struct iommufd_vevent * +iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + struct iommufd_vevent *vevent = NULL; + + spin_lock(&eventq->lock); + if (!list_empty(list)) { + struct iommufd_vevent *next; + + next = list_first_entry(list, struct iommufd_vevent, node); + /* Make a copy of the lost_events_header for copy_to_user */ + if (next == &veventq->lost_events_header) { + vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC); + if (!vevent) + goto out_unlock; + } + list_del(&next->node); + if (vevent) + memcpy(vevent, next, sizeof(*vevent)); + else + vevent = next; + } +out_unlock: + spin_unlock(&eventq->lock); + return vevent; +} + +static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + + spin_lock(&eventq->lock); + if (vevent_for_lost_events_header(vevent)) { + /* Remove the copy of the lost_events_header */ + kfree(vevent); + vevent = NULL; + /* An empty list needs the lost_events_header back */ + if (list_empty(list)) + vevent = &veventq->lost_events_header; + } + if (vevent) + list_add(&vevent->node, list); + spin_unlock(&eventq->lock); +} + +static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_vevent_header *hdr; + struct iommufd_vevent *cur; + size_t done = 0; + int rc = 0; + + if (*ppos) + return -ESPIPE; + + while ((cur = iommufd_veventq_deliver_fetch(veventq))) { + /* Validate the remaining bytes against the header size */ + if (done >= count || sizeof(*hdr) > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + hdr = &cur->header; + + /* If being a normal vEVENT, validate against the full size */ + if (!vevent_for_lost_events_header(cur) && + sizeof(hdr) + cur->data_len > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + + if (copy_to_user(buf + done, hdr, sizeof(*hdr))) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + done += sizeof(*hdr); + + if (cur->data_len && + copy_to_user(buf + done, cur->event_data, cur->data_len)) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + spin_lock(&eventq->lock); + if (!vevent_for_lost_events_header(cur)) + veventq->num_events--; + spin_unlock(&eventq->lock); + done += cur->data_len; + kfree(cur); + } + + return done == 0 ? rc : done; +} + +/* Common Event Queue Functions */ + +static __poll_t iommufd_eventq_fops_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct iommufd_eventq *eventq = filep->private_data; + __poll_t pollflags = 0; + + if (eventq->obj.type == IOMMUFD_OBJ_FAULT) + pollflags |= EPOLLOUT; + + poll_wait(filep, &eventq->wait_queue, wait); + spin_lock(&eventq->lock); + if (!list_empty(&eventq->deliver)) + pollflags |= EPOLLIN | EPOLLRDNORM; + spin_unlock(&eventq->lock); + + return pollflags; +} + +static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep) +{ + struct iommufd_eventq *eventq = filep->private_data; + + refcount_dec(&eventq->obj.users); + iommufd_ctx_put(eventq->ictx); + return 0; +} + +#define INIT_EVENTQ_FOPS(read_op, write_op) \ + ((const struct file_operations){ \ + .owner = THIS_MODULE, \ + .open = nonseekable_open, \ + .read = read_op, \ + .write = write_op, \ + .poll = iommufd_eventq_fops_poll, \ + .release = iommufd_eventq_fops_release, \ + }) + +static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, + struct iommufd_ctx *ictx, + const struct file_operations *fops) +{ + struct file *filep; + int fdno; + + spin_lock_init(&eventq->lock); + INIT_LIST_HEAD(&eventq->deliver); + init_waitqueue_head(&eventq->wait_queue); + + filep = anon_inode_getfile(name, fops, eventq, O_RDWR); + if (IS_ERR(filep)) + return PTR_ERR(filep); + + eventq->ictx = ictx; + iommufd_ctx_get(eventq->ictx); + eventq->filep = filep; + refcount_inc(&eventq->obj.users); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) + fput(filep); + return fdno; +} + +static const struct file_operations iommufd_fault_fops = + INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_fault_alloc *cmd = ucmd->cmd; + struct iommufd_fault *fault; + int fdno; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + fault = __iommufd_object_alloc_ucmd(ucmd, fault, IOMMUFD_OBJ_FAULT, + common.obj); + if (IS_ERR(fault)) + return PTR_ERR(fault); + + xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); + mutex_init(&fault->mutex); + + fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]", + ucmd->ictx, &iommufd_fault_fops); + if (fdno < 0) + return fdno; + + cmd->out_fault_id = fault->common.obj.id; + cmd->out_fault_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + + fd_install(fdno, fault->common.filep); + + return 0; +out_put_fdno: + put_unused_fd(fdno); + fput(fault->common.filep); + return rc; +} + +int iommufd_fault_iopf_handler(struct iopf_group *group) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_fault *fault; + + hwpt = group->attach_handle->domain->iommufd_hwpt; + fault = hwpt->fault; + + spin_lock(&fault->common.lock); + list_add_tail(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); + + wake_up_interruptible(&fault->common.wait_queue); + + return 0; +} + +static const struct file_operations iommufd_veventq_fops = + INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL); + +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_veventq_alloc *cmd = ucmd->cmd; + struct iommufd_veventq *veventq; + struct iommufd_viommu *viommu; + int fdno; + int rc; + + if (cmd->flags || cmd->__reserved || + cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->veventq_depth) + return -EINVAL; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + down_write(&viommu->veventqs_rwsem); + + if (iommufd_viommu_find_veventq(viommu, cmd->type)) { + rc = -EEXIST; + goto out_unlock_veventqs; + } + + veventq = __iommufd_object_alloc(ucmd->ictx, veventq, + IOMMUFD_OBJ_VEVENTQ, common.obj); + if (IS_ERR(veventq)) { + rc = PTR_ERR(veventq); + goto out_unlock_veventqs; + } + + veventq->type = cmd->type; + veventq->viommu = viommu; + refcount_inc(&viommu->obj.users); + veventq->depth = cmd->veventq_depth; + list_add_tail(&veventq->node, &viommu->veventqs); + veventq->lost_events_header.header.flags = + IOMMU_VEVENTQ_FLAG_LOST_EVENTS; + + fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]", + ucmd->ictx, &iommufd_veventq_fops); + if (fdno < 0) { + rc = fdno; + goto out_abort; + } + + cmd->out_veventq_id = veventq->common.obj.id; + cmd->out_veventq_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + + iommufd_object_finalize(ucmd->ictx, &veventq->common.obj); + fd_install(fdno, veventq->common.filep); + goto out_unlock_veventqs; + +out_put_fdno: + put_unused_fd(fdno); + fput(veventq->common.filep); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj); +out_unlock_veventqs: + up_write(&viommu->veventqs_rwsem); + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c deleted file mode 100644 index 053b0e30f55a..000000000000 --- a/drivers/iommu/iommufd/fault.c +++ /dev/null @@ -1,446 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (C) 2024 Intel Corporation - */ -#define pr_fmt(fmt) "iommufd: " fmt - -#include <linux/anon_inodes.h> -#include <linux/file.h> -#include <linux/fs.h> -#include <linux/iommufd.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/pci.h> -#include <linux/pci-ats.h> -#include <linux/poll.h> -#include <uapi/linux/iommufd.h> - -#include "../iommu-priv.h" -#include "iommufd_private.h" - -static int iommufd_fault_iopf_enable(struct iommufd_device *idev) -{ - struct device *dev = idev->dev; - int ret; - - /* - * Once we turn on PCI/PRI support for VF, the response failure code - * should not be forwarded to the hardware due to PRI being a shared - * resource between PF and VFs. There is no coordination for this - * shared capability. This waits for a vPRI reset to recover. - */ - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (pdev->is_virtfn && pci_pri_supported(pdev)) - return -EINVAL; - } - - mutex_lock(&idev->iopf_lock); - /* Device iopf has already been on. */ - if (++idev->iopf_enabled > 1) { - mutex_unlock(&idev->iopf_lock); - return 0; - } - - ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF); - if (ret) - --idev->iopf_enabled; - mutex_unlock(&idev->iopf_lock); - - return ret; -} - -static void iommufd_fault_iopf_disable(struct iommufd_device *idev) -{ - mutex_lock(&idev->iopf_lock); - if (!WARN_ON(idev->iopf_enabled == 0)) { - if (--idev->iopf_enabled == 0) - iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF); - } - mutex_unlock(&idev->iopf_lock); -} - -static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - int ret; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); - if (ret) - kfree(handle); - - return ret; -} - -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - int ret; - - if (!hwpt->fault) - return -EINVAL; - - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - - ret = __fault_domain_attach_dev(hwpt, idev); - if (ret) - iommufd_fault_iopf_disable(idev); - - return ret; -} - -static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, - struct iommufd_attach_handle *handle) -{ - struct iommufd_fault *fault = hwpt->fault; - struct iopf_group *group, *next; - unsigned long index; - - if (!fault) - return; - - mutex_lock(&fault->mutex); - list_for_each_entry_safe(group, next, &fault->deliver, node) { - if (group->attach_handle != &handle->handle) - continue; - list_del(&group->node); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - - xa_for_each(&fault->response, index, group) { - if (group->attach_handle != &handle->handle) - continue; - xa_erase(&fault->response, index); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - mutex_unlock(&fault->mutex); -} - -static struct iommufd_attach_handle * -iommufd_device_get_attach_handle(struct iommufd_device *idev) -{ - struct iommu_attach_handle *handle; - - handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); - if (IS_ERR(handle)) - return NULL; - - return to_iommufd_handle(handle); -} - -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - - handle = iommufd_device_get_attach_handle(idev); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); - iommufd_auto_response_faults(hwpt, handle); - iommufd_fault_iopf_disable(idev); - kfree(handle); -} - -static int __fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - struct iommufd_attach_handle *handle, *curr = NULL; - int ret; - - if (old->fault) - curr = iommufd_device_get_attach_handle(idev); - - if (hwpt->fault) { - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, &handle->handle); - } else { - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, NULL); - } - - if (!ret && curr) { - iommufd_auto_response_faults(old, curr); - kfree(curr); - } - - return ret; -} - -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - bool iopf_off = !hwpt->fault && old->fault; - bool iopf_on = hwpt->fault && !old->fault; - int ret; - - if (iopf_on) { - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - } - - ret = __fault_domain_replace_dev(idev, hwpt, old); - if (ret) { - if (iopf_on) - iommufd_fault_iopf_disable(idev); - return ret; - } - - if (iopf_off) - iommufd_fault_iopf_disable(idev); - - return 0; -} - -void iommufd_fault_destroy(struct iommufd_object *obj) -{ - struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); - struct iopf_group *group, *next; - - /* - * The iommufd object's reference count is zero at this point. - * We can be confident that no other threads are currently - * accessing this pointer. Therefore, acquiring the mutex here - * is unnecessary. - */ - list_for_each_entry_safe(group, next, &fault->deliver, node) { - list_del(&group->node); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } -} - -static void iommufd_compose_fault_message(struct iommu_fault *fault, - struct iommu_hwpt_pgfault *hwpt_fault, - struct iommufd_device *idev, - u32 cookie) -{ - hwpt_fault->flags = fault->prm.flags; - hwpt_fault->dev_id = idev->obj.id; - hwpt_fault->pasid = fault->prm.pasid; - hwpt_fault->grpid = fault->prm.grpid; - hwpt_fault->perm = fault->prm.perm; - hwpt_fault->addr = fault->prm.addr; - hwpt_fault->length = 0; - hwpt_fault->cookie = cookie; -} - -static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, - size_t count, loff_t *ppos) -{ - size_t fault_size = sizeof(struct iommu_hwpt_pgfault); - struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_pgfault data; - struct iommufd_device *idev; - struct iopf_group *group; - struct iopf_fault *iopf; - size_t done = 0; - int rc = 0; - - if (*ppos || count % fault_size) - return -ESPIPE; - - mutex_lock(&fault->mutex); - while (!list_empty(&fault->deliver) && count > done) { - group = list_first_entry(&fault->deliver, - struct iopf_group, node); - - if (group->fault_count * fault_size > count - done) - break; - - rc = xa_alloc(&fault->response, &group->cookie, group, - xa_limit_32b, GFP_KERNEL); - if (rc) - break; - - idev = to_iommufd_handle(group->attach_handle)->idev; - list_for_each_entry(iopf, &group->faults, list) { - iommufd_compose_fault_message(&iopf->fault, - &data, idev, - group->cookie); - if (copy_to_user(buf + done, &data, fault_size)) { - xa_erase(&fault->response, group->cookie); - rc = -EFAULT; - break; - } - done += fault_size; - } - - list_del(&group->node); - } - mutex_unlock(&fault->mutex); - - return done == 0 ? rc : done; -} - -static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, - size_t count, loff_t *ppos) -{ - size_t response_size = sizeof(struct iommu_hwpt_page_response); - struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_page_response response; - struct iopf_group *group; - size_t done = 0; - int rc = 0; - - if (*ppos || count % response_size) - return -ESPIPE; - - mutex_lock(&fault->mutex); - while (count > done) { - rc = copy_from_user(&response, buf + done, response_size); - if (rc) - break; - - static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == - (int)IOMMU_PAGE_RESP_SUCCESS); - static_assert((int)IOMMUFD_PAGE_RESP_INVALID == - (int)IOMMU_PAGE_RESP_INVALID); - if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && - response.code != IOMMUFD_PAGE_RESP_INVALID) { - rc = -EINVAL; - break; - } - - group = xa_erase(&fault->response, response.cookie); - if (!group) { - rc = -EINVAL; - break; - } - - iopf_group_response(group, response.code); - iopf_free_group(group); - done += response_size; - } - mutex_unlock(&fault->mutex); - - return done == 0 ? rc : done; -} - -static __poll_t iommufd_fault_fops_poll(struct file *filep, - struct poll_table_struct *wait) -{ - struct iommufd_fault *fault = filep->private_data; - __poll_t pollflags = EPOLLOUT; - - poll_wait(filep, &fault->wait_queue, wait); - mutex_lock(&fault->mutex); - if (!list_empty(&fault->deliver)) - pollflags |= EPOLLIN | EPOLLRDNORM; - mutex_unlock(&fault->mutex); - - return pollflags; -} - -static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) -{ - struct iommufd_fault *fault = filep->private_data; - - refcount_dec(&fault->obj.users); - iommufd_ctx_put(fault->ictx); - return 0; -} - -static const struct file_operations iommufd_fault_fops = { - .owner = THIS_MODULE, - .open = nonseekable_open, - .read = iommufd_fault_fops_read, - .write = iommufd_fault_fops_write, - .poll = iommufd_fault_fops_poll, - .release = iommufd_fault_fops_release, -}; - -int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) -{ - struct iommu_fault_alloc *cmd = ucmd->cmd; - struct iommufd_fault *fault; - struct file *filep; - int fdno; - int rc; - - if (cmd->flags) - return -EOPNOTSUPP; - - fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); - if (IS_ERR(fault)) - return PTR_ERR(fault); - - fault->ictx = ucmd->ictx; - INIT_LIST_HEAD(&fault->deliver); - xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); - mutex_init(&fault->mutex); - init_waitqueue_head(&fault->wait_queue); - - filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, - fault, O_RDWR); - if (IS_ERR(filep)) { - rc = PTR_ERR(filep); - goto out_abort; - } - - refcount_inc(&fault->obj.users); - iommufd_ctx_get(fault->ictx); - fault->filep = filep; - - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - rc = fdno; - goto out_fput; - } - - cmd->out_fault_id = fault->obj.id; - cmd->out_fault_fd = fdno; - - rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); - if (rc) - goto out_put_fdno; - iommufd_object_finalize(ucmd->ictx, &fault->obj); - - fd_install(fdno, fault->filep); - - return 0; -out_put_fdno: - put_unused_fd(fdno); -out_fput: - fput(filep); - refcount_dec(&fault->obj.users); - iommufd_ctx_put(fault->ictx); -out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); - - return rc; -} - -int iommufd_fault_iopf_handler(struct iopf_group *group) -{ - struct iommufd_hw_pagetable *hwpt; - struct iommufd_fault *fault; - - hwpt = group->attach_handle->domain->fault_data; - fault = hwpt->fault; - - mutex_lock(&fault->mutex); - list_add_tail(&group->node, &fault->deliver); - mutex_unlock(&fault->mutex); - - wake_up_interruptible(&fault->wait_queue); - - return 0; -} diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index ce03c3804651..fe789c2dc0c9 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt) iommu_domain_free(hwpt->domain); if (hwpt->fault) - refcount_dec(&hwpt->fault->obj.users); + refcount_dec(&hwpt->fault->common.obj.users); } void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) @@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) * @ictx: iommufd context * @ioas: IOAS to associate the domain with * @idev: Device to get an iommu_domain for + * @pasid: PASID to get an iommu_domain for * @flags: Flags from userspace * @immediate_attach: True if idev should be attached to the hwpt * @user_data: The user provided driver specific data describing the domain to @@ -105,13 +106,14 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) */ struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data) { const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_FAULT_ID_VALID; + IOMMU_HWPT_FAULT_ID_VALID | + IOMMU_HWPT_ALLOC_PASID; const struct iommu_ops *ops = dev_iommu_ops(idev->dev); struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; @@ -126,12 +128,16 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_FAULT_ID_VALID) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); if (IS_ERR(hwpt_paging)) return ERR_CAST(hwpt_paging); hwpt = &hwpt_paging->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; INIT_LIST_HEAD(&hwpt_paging->hwpt_item); /* Pairs with iommufd_hw_pagetable_destroy() */ @@ -140,8 +146,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; if (ops->domain_alloc_paging_flags) { - hwpt->domain = ops->domain_alloc_paging_flags(idev->dev, flags, - user_data); + hwpt->domain = ops->domain_alloc_paging_flags(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; @@ -156,6 +162,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } } + hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; /* * Set the coherency mode before we do iopt_table_add_domain() as some @@ -184,7 +192,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, * sequence. Once those drivers are fixed this should be removed. */ if (immediate_attach) { - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) goto out_abort; } @@ -197,7 +205,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, out_detach: if (immediate_attach) - iommufd_hw_pagetable_detach(idev); + iommufd_hw_pagetable_detach(idev, pasid); out_abort: iommufd_object_abort_and_destroy(ictx, &hwpt->obj); return ERR_PTR(rc); @@ -226,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) || !user_data->len || !ops->domain_alloc_nested) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent || @@ -238,6 +246,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; @@ -251,9 +260,11 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, goto out_abort; } hwpt->domain->owner = ops; + hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { - rc = -EINVAL; + rc = -EOPNOTSUPP; goto out_abort; } return hwpt_nested; @@ -280,6 +291,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, struct iommufd_hw_pagetable *hwpt; int rc; + if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) + return ERR_PTR(-EOPNOTSUPP); if (!user_data->len) return ERR_PTR(-EOPNOTSUPP); if (!viommu->ops || !viommu->ops->alloc_domain_nested) @@ -290,22 +303,25 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; hwpt_nested->viommu = viommu; refcount_inc(&viommu->obj.users); hwpt_nested->parent = viommu->hwpt; - hwpt->domain = - viommu->ops->alloc_domain_nested(viommu, flags, user_data); + hwpt->domain = viommu->ops->alloc_domain_nested( + viommu, flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; goto out_abort; } + hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { - rc = -EINVAL; + rc = -EOPNOTSUPP; goto out_abort; } return hwpt_nested; @@ -351,8 +367,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ioas = container_of(pt_obj, struct iommufd_ioas, obj); mutex_lock(&ioas->mutex); hwpt_paging = iommufd_hwpt_paging_alloc( - ucmd->ictx, ioas, idev, cmd->flags, false, - user_data.len ? &user_data : NULL); + ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags, + false, user_data.len ? &user_data : NULL); if (IS_ERR(hwpt_paging)) { rc = PTR_ERR(hwpt_paging); goto out_unlock; @@ -402,9 +418,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) } hwpt->fault = fault; hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; - hwpt->domain->fault_data = hwpt; - refcount_inc(&fault->obj.users); - iommufd_put_object(ucmd->ictx, &fault->obj); + refcount_inc(&fault->common.obj.users); + iommufd_put_object(ucmd->ictx, &fault->common.obj); } cmd->out_hwpt_id = hwpt->obj.id; diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 8a790e597e12..c0360c450880 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) return iter->area; } -static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, - unsigned long length, - unsigned long iova_alignment, - unsigned long page_offset) +static bool __alloc_iova_check_range(unsigned long *start, unsigned long last, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) { - if (span->is_used || span->last_hole - span->start_hole < length - 1) + unsigned long aligned_start; + + /* ALIGN_UP() */ + if (check_add_overflow(*start, iova_alignment - 1, &aligned_start)) return false; + aligned_start &= ~(iova_alignment - 1); + aligned_start |= page_offset; - span->start_hole = ALIGN(span->start_hole, iova_alignment) | - page_offset; - if (span->start_hole > span->last_hole || - span->last_hole - span->start_hole < length - 1) + if (aligned_start >= last || last - aligned_start < length - 1) return false; + *start = aligned_start; return true; } -static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { - if (span->is_hole || span->last_used - span->start_used < length - 1) + if (span->is_used) return false; + return __alloc_iova_check_range(&span->start_hole, span->last_hole, + length, iova_alignment, page_offset); +} - span->start_used = ALIGN(span->start_used, iova_alignment) | - page_offset; - if (span->start_used > span->last_used || - span->last_used - span->start_used < length - 1) +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole) return false; - return true; + return __alloc_iova_check_range(&span->start_used, span->last_used, + length, iova_alignment, page_offset); } /* @@ -719,6 +728,12 @@ again: goto out_unlock_iova; } + /* The area is locked by an object that has not been destroyed */ + if (area->num_locks) { + rc = -EBUSY; + goto out_unlock_iova; + } + if (area_first < start || area_last > last) { rc = -ENOENT; goto out_unlock_iova; @@ -743,8 +758,10 @@ again: iommufd_access_notify_unmap(iopt, area_first, length); /* Something is not responding to unmap requests. */ tries++; - if (WARN_ON(tries > 100)) - return -EDEADLOCK; + if (WARN_ON(tries > 100)) { + rc = -EDEADLOCK; + goto out_unmapped; + } goto again; } @@ -766,6 +783,7 @@ again: out_unlock_iova: up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); +out_unmapped: if (unmapped) *unmapped = unmapped_bytes; return rc; @@ -1410,8 +1428,7 @@ out_unlock: } void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access, - u32 iopt_access_list_id) + struct iommufd_access *access, u32 iopt_access_list_id) { down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 10c928a9a463..b6064f4ce4af 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -48,6 +48,7 @@ struct iopt_area { int iommu_prot; bool prevent_access : 1; unsigned int num_accesses; + unsigned int num_locks; }; struct iopt_allowed { @@ -238,9 +239,9 @@ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start, int iopt_area_add_access(struct iopt_area *area, unsigned long start, unsigned long last, struct page **out_pages, - unsigned int flags); + unsigned int flags, bool lock_area); void iopt_area_remove_access(struct iopt_area *area, unsigned long start, - unsigned long last); + unsigned long last, bool unlock_area); int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, void *data, unsigned long length, unsigned int flags); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index b6d706cf2c66..0da2a81eedfa 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -7,6 +7,7 @@ #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/iova_bitmap.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/uaccess.h> #include <linux/xarray.h> @@ -19,12 +20,36 @@ struct iommu_group; struct iommu_option; struct iommufd_device; +struct iommufd_sw_msi_map { + struct list_head sw_msi_item; + phys_addr_t sw_msi_start; + phys_addr_t msi_addr; + unsigned int pgoff; + unsigned int id; +}; + +/* Bitmap of struct iommufd_sw_msi_map::id */ +struct iommufd_sw_msi_maps { + DECLARE_BITMAP(bitmap, 64); +}; + +#ifdef CONFIG_IRQ_MSI_IOMMU +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map); +#endif + struct iommufd_ctx { struct file *file; struct xarray objects; struct xarray groups; wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct maple_tree mt_mmap; + + struct mutex sw_msi_lock; + struct list_head sw_msi_list; + unsigned int sw_msi_id; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -32,6 +57,18 @@ struct iommufd_ctx { struct iommufd_ioas *vfio_ioas; }; +/* Entry for iommufd_ctx::mt_mmap */ +struct iommufd_mmap { + struct iommufd_object *owner; + + /* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */ + unsigned long vm_pgoff; + + /* Physical range for io_remap_pfn_range() */ + phys_addr_t mmio_addr; + size_t length; +}; + /* * The IOVA to PFN map. The map automatically copies the PFNs into multiple * domains and permits sharing of PFNs between io_pagetable instances. This @@ -112,6 +149,7 @@ struct iommufd_ucmd { void __user *ubuffer; u32 user_size; void *cmd; + struct iommufd_object *new_obj; }; int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, @@ -131,7 +169,7 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->shortterm_users)) { + if (!refcount_inc_not_zero(&obj->wait_cnt)) { /* * If the caller doesn't already have a ref on obj this must be * called under the xa_lock. Otherwise the caller is holding a @@ -149,11 +187,11 @@ static inline void iommufd_put_object(struct iommufd_ctx *ictx, struct iommufd_object *obj) { /* - * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees - * a spurious !0 users with a 0 shortterm_users. + * Users first, then wait_cnt so that REMOVE_WAIT never sees a spurious + * !0 users with a 0 wait_cnt. */ refcount_dec(&obj->users); - if (refcount_dec_and_test(&obj->shortterm_users)) + if (refcount_dec_and_test(&obj->wait_cnt)) wake_up_interruptible_all(&ictx->destroy_wait); } @@ -164,7 +202,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); enum { - REMOVE_WAIT_SHORTTERM = 1, + REMOVE_WAIT = BIT(0), + REMOVE_OBJ_TOMBSTONE = BIT(1), }; int iommufd_object_remove(struct iommufd_ctx *ictx, struct iommufd_object *to_destroy, u32 id, @@ -172,15 +211,35 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, /* * The caller holds a users refcount and wants to destroy the object. At this - * point the caller has no shortterm_users reference and at least the xarray - * will be holding one. + * point the caller has no wait_cnt reference and at least the xarray will be + * holding one. */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { int ret; - ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT); + + /* + * If there is a bug and we couldn't destroy the object then we did put + * back the caller's users refcount and will eventually try to free it + * again during close. + */ + WARN_ON(ret); +} + +/* + * Similar to iommufd_object_destroy_user(), except that the object ID is left + * reserved/tombstoned. + */ +static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + int ret; + + ret = iommufd_object_remove(ictx, obj, obj->id, + REMOVE_WAIT | REMOVE_OBJ_TOMBSTONE); /* * If there is a bug and we couldn't destroy the object then we did put @@ -207,6 +266,15 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, iommufd_object_remove(ictx, obj, obj->id, 0); } +/* + * Callers of these normal object allocators must call iommufd_object_finalize() + * to finalize the object, or call iommufd_object_abort_and_destroy() to revert + * the allocation. + */ +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); + #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ @@ -220,6 +288,26 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, __iommufd_object_alloc(ictx, ptr, type, obj) /* + * Callers of these _ucmd allocators should not call iommufd_object_finalize() + * or iommufd_object_abort_and_destroy(), as the core automatically does that. + */ +struct iommufd_object * +_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, size_t size, + enum iommufd_object_type type); + +#define __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) \ + container_of(_iommufd_object_alloc_ucmd( \ + ucmd, \ + sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ + offsetof(typeof(*(ptr)), \ + obj) != 0), \ + type), \ + typeof(*(ptr)), obj) + +#define iommufd_object_alloc_ucmd(ucmd, ptr, type) \ + __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) + +/* * The IO Address Space (IOAS) pagetable is a virtual page table backed by the * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The * mapping is copied into all of the associated domains and made available to @@ -243,8 +331,7 @@ struct iommufd_ioas { static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, u32 id) { - return container_of(iommufd_get_object(ictx, id, - IOMMUFD_OBJ_IOAS), + return container_of(iommufd_get_object(ictx, id, IOMMUFD_OBJ_IOAS), struct iommufd_ioas, obj); } @@ -276,6 +363,7 @@ struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; struct iommufd_fault *fault; + bool pasid_compat : 1; }; struct iommufd_hwpt_paging { @@ -283,10 +371,10 @@ struct iommufd_hwpt_paging { struct iommufd_ioas *ioas; bool auto_domain : 1; bool enforce_cache_coherency : 1; - bool msi_cookie : 1; bool nest_parent : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; + struct iommufd_sw_msi_maps present_sw_msi; }; struct iommufd_hwpt_nested { @@ -346,13 +434,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); + struct iommufd_device *idev, ioasid_t pasid); struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev); +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid); void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); @@ -376,13 +464,15 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, refcount_dec(&hwpt->obj.users); } +struct iommufd_attach; + struct iommufd_group { struct kref ref; struct mutex lock; struct iommufd_ctx *ictx; struct iommu_group *group; - struct iommufd_hw_pagetable *hwpt; - struct list_head device_list; + struct xarray pasid_attach; + struct iommufd_sw_msi_maps required_sw_msi; phys_addr_t sw_msi_start; }; @@ -399,9 +489,8 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; - /* protect iopf_enabled counter */ - struct mutex iopf_lock; - unsigned int iopf_enabled; + struct iommufd_vdevice *vdev; + bool destroying; }; static inline struct iommufd_device * @@ -412,6 +501,7 @@ iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_device, obj); } +void iommufd_device_pre_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); @@ -429,24 +519,39 @@ struct iommufd_access { int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access, - u32 iopt_access_list_id); + struct iommufd_access *access, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); -/* - * An iommufd_fault object represents an interface to deliver I/O page faults - * to the user space. These objects are created/destroyed by the user space and - * associated with hardware page table objects during page-table allocation. - */ -struct iommufd_fault { +/* iommufd_access for internal use */ +static inline bool iommufd_access_is_internal(struct iommufd_access *access) +{ + return !access->ictx; +} + +struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx); + +static inline void +iommufd_access_destroy_internal(struct iommufd_ctx *ictx, + struct iommufd_access *access) +{ + iommufd_object_destroy_user(ictx, &access->obj); +} + +int iommufd_access_attach_internal(struct iommufd_access *access, + struct iommufd_ioas *ioas); + +static inline void iommufd_access_detach_internal(struct iommufd_access *access) +{ + iommufd_access_detach(access); +} + +struct iommufd_eventq { struct iommufd_object obj; struct iommufd_ctx *ictx; struct file *filep; - /* The lists of outstanding faults protected by below mutex. */ - struct mutex mutex; + spinlock_t lock; /* protects the deliver list */ struct list_head deliver; - struct xarray response; struct wait_queue_head wait_queue; }; @@ -459,54 +564,103 @@ struct iommufd_attach_handle { /* Convert an iommu attach handle to iommufd handle. */ #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_eventq common; + struct mutex mutex; /* serializes response flows */ + struct xarray response; +}; + +static inline struct iommufd_fault * +eventq_to_fault(struct iommufd_eventq *eventq) +{ + return container_of(eventq, struct iommufd_fault, common); +} + static inline struct iommufd_fault * iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_FAULT), - struct iommufd_fault, obj); + struct iommufd_fault, common.obj); } int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); void iommufd_fault_destroy(struct iommufd_object *obj); int iommufd_fault_iopf_handler(struct iopf_group *group); +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle); + +/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */ +struct iommufd_vevent { + struct iommufd_vevent_header header; + struct list_head node; /* for iommufd_eventq::deliver */ + ssize_t data_len; + u64 event_data[] __counted_by(data_len); +}; -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old); +#define vevent_for_lost_events_header(vevent) \ + (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) -static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - if (hwpt->fault) - return iommufd_fault_domain_attach_dev(hwpt, idev); +/* + * An iommufd_veventq object represents an interface to deliver vIOMMU events to + * the user space. It is created/destroyed by the user space and associated with + * a vIOMMU object during the allocations. + */ +struct iommufd_veventq { + struct iommufd_eventq common; + struct iommufd_viommu *viommu; + struct list_head node; /* for iommufd_viommu::veventqs */ + struct iommufd_vevent lost_events_header; - return iommu_attach_group(hwpt->domain, idev->igroup->group); -} + enum iommu_veventq_type type; + unsigned int depth; + + /* Use common.lock for protection */ + u32 num_events; + u32 sequence; +}; -static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) +static inline struct iommufd_veventq * +eventq_to_veventq(struct iommufd_eventq *eventq) { - if (hwpt->fault) { - iommufd_fault_domain_detach_dev(hwpt, idev); - return; - } + return container_of(eventq, struct iommufd_veventq, common); +} - iommu_detach_group(hwpt->domain, idev->igroup->group); +static inline struct iommufd_veventq * +iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_VEVENTQ), + struct iommufd_veventq, common.obj); } -static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd); +void iommufd_veventq_destroy(struct iommufd_object *obj); +void iommufd_veventq_abort(struct iommufd_object *obj); + +static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) { - if (old->fault || hwpt->fault) - return iommufd_fault_domain_replace_dev(idev, hwpt, old); + struct iommufd_eventq *eventq = &veventq->common; + + lockdep_assert_held(&eventq->lock); + + /* + * Remove the lost_events_header and add the new node at the same time. + * Note the new node can be lost_events_header, for a sequence update. + */ + if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver)) + list_del(&veventq->lost_events_header.node); + list_add_tail(&vevent->node, &eventq->deliver); + vevent->header.sequence = veventq->sequence; + veventq->sequence = (veventq->sequence + 1) & INT_MAX; - return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); + wake_up_interruptible(&eventq->wait_queue); } static inline struct iommufd_viommu * @@ -517,18 +671,36 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_viommu, obj); } +static inline struct iommufd_veventq * +iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, + enum iommu_veventq_type type) +{ + struct iommufd_veventq *veventq, *next; + + lockdep_assert_held(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) { + if (veventq->type == type) + return veventq; + } + return NULL; +} + int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_vdevice_destroy(struct iommufd_object *obj); +void iommufd_vdevice_abort(struct iommufd_object *obj); +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_hw_queue_destroy(struct iommufd_object *obj); -struct iommufd_vdevice { - struct iommufd_object obj; - struct iommufd_ctx *ictx; - struct iommufd_viommu *viommu; - struct device *dev; - u64 id; /* per-vIOMMU virtual ID */ -}; +static inline struct iommufd_vdevice * +iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id) +{ + return container_of(iommufd_get_object(ictx, id, + IOMMUFD_OBJ_VDEVICE), + struct iommufd_vdevice, obj); +} #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index a6b7a163f636..8fc618b2bcf9 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -24,6 +24,11 @@ enum { IOMMU_TEST_OP_MD_CHECK_IOTLB, IOMMU_TEST_OP_TRIGGER_IOPF, IOMMU_TEST_OP_DEV_CHECK_CACHE, + IOMMU_TEST_OP_TRIGGER_VEVENT, + IOMMU_TEST_OP_PASID_ATTACH, + IOMMU_TEST_OP_PASID_REPLACE, + IOMMU_TEST_OP_PASID_DETACH, + IOMMU_TEST_OP_PASID_CHECK_HWPT, }; enum { @@ -48,6 +53,7 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, + MOCK_FLAGS_DEVICE_PASID = 1 << 2, }; enum { @@ -60,6 +66,9 @@ enum { MOCK_DEV_CACHE_NUM = 4, }; +/* Reserved for special pasid replace test */ +#define IOMMU_TEST_PASID_RESERVED 1024 + struct iommu_test_cmd { __u32 size; __u32 op; @@ -145,11 +154,36 @@ struct iommu_test_cmd { __u32 id; __u32 cache; } check_dev_cache; + struct { + __u32 dev_id; + } trigger_vevent; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_attach; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_replace; + struct { + __u32 pasid; + /* @id is stdev_id */ + } pasid_detach; + struct { + __u32 pasid; + __u32 hwpt_id; + /* @id is stdev_id */ + } pasid_check; }; __u32 last; }; #define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) +/* Mock device/iommu PASID width */ +#define MOCK_PASID_WIDTH 20 + /* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */ #define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef #define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef @@ -193,6 +227,23 @@ struct iommu_hwpt_invalidate_selftest { #define IOMMU_VIOMMU_TYPE_SELFTEST 0xdeadbeef +/** + * struct iommu_viommu_selftest - vIOMMU data for Mock driver + * (IOMMU_VIOMMU_TYPE_SELFTEST) + * @in_data: Input random data from user space + * @out_data: Output data (matching @in_data) to user space + * @out_mmap_offset: The offset argument for mmap syscall + * @out_mmap_length: The length argument for mmap syscall + * + * Simply set @out_data=@in_data for a loopback test + */ +struct iommu_viommu_selftest { + __u32 in_data; + __u32 out_data; + __aligned_u64 out_mmap_offset; + __aligned_u64 out_mmap_length; +}; + /* Should not be equal to any defined value in enum iommu_viommu_invalidate_data_type */ #define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST 0xdeadbeef #define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID 0xdadbeef @@ -212,4 +263,13 @@ struct iommu_viommu_invalidate_selftest { __u32 cache_id; }; +#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef + +struct iommu_viommu_event_selftest { + __u32 virt_id; +}; + +#define IOMMU_HW_QUEUE_TYPE_SELFTEST 0xdeadbeef +#define IOMMU_TEST_HW_QUEUE_MAX 2 + #endif diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index d90b9e253412..4514575818fc 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,7 +130,7 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1 << bitmap->mapped.pgshift; + unsigned long pgsize = 1UL << bitmap->mapped.pgshift; return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); } @@ -272,7 +272,7 @@ err: iova_bitmap_free(bitmap); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, "IOMMUFD"); /** * iova_bitmap_free() - Frees an IOVA bitmap object @@ -294,7 +294,7 @@ void iova_bitmap_free(struct iova_bitmap *bitmap) kfree(bitmap); } -EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, "IOMMUFD"); /* * Returns the remaining bitmap indexes from mapped_total_index to process for @@ -387,7 +387,7 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, { return fn(bitmap, bitmap->iova, bitmap->length, opaque); } -EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, "IOMMUFD"); /** * iova_bitmap_set() - Records an IOVA range in bitmap @@ -407,7 +407,6 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, update_indexes: if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) { - /* * The attempt to advance the base index to @iova * may fail if it's out of bounds, or pinning the pages @@ -445,4 +444,4 @@ update_indexes: cur_bit += nbits; } while (cur_bit <= last_bit); } -EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, "IOMMUFD"); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 0a96cc8f27da..15af7ced0501 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -23,12 +23,72 @@ #include "iommufd_test.h" struct iommufd_object_ops { + void (*pre_destroy)(struct iommufd_object *obj); void (*destroy)(struct iommufd_object *obj); void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->wait_cnt, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + +struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *new_obj; + + /* Something is coded wrong if this is hit */ + if (WARN_ON(ucmd->new_obj)) + return ERR_PTR(-EBUSY); + + /* + * An abort op means that its caller needs to invoke it within a lock in + * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that + * will invoke the abort op in iommufd_object_abort_and_destroy(), which + * must be outside the caller's lock. + */ + if (WARN_ON(iommufd_object_ops[type].abort)) + return ERR_PTR(-EOPNOTSUPP); + + new_obj = _iommufd_object_alloc(ucmd->ictx, size, type); + if (IS_ERR(new_obj)) + return new_obj; + + ucmd->new_obj = new_obj; + return new_obj; +} + /* * Allow concurrent access to the object. * @@ -95,20 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } -static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, - struct iommufd_object *to_destroy) +static int iommufd_object_dec_wait(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) { - if (refcount_dec_and_test(&to_destroy->shortterm_users)) + if (refcount_dec_and_test(&to_destroy->wait_cnt)) return 0; + if (iommufd_object_ops[to_destroy->type].pre_destroy) + iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy); + if (wait_event_timeout(ictx->destroy_wait, - refcount_read(&to_destroy->shortterm_users) == - 0, - msecs_to_jiffies(10000))) + refcount_read(&to_destroy->wait_cnt) == 0, + msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); - refcount_inc(&to_destroy->shortterm_users); + refcount_inc(&to_destroy->wait_cnt); return -EBUSY; } @@ -122,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - bool zerod_shortterm = false; + bool zerod_wait_cnt = false; int ret; /* - * The purpose of the shortterm_users is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must increment - * shortterm_users, such as during ioctl execution. + * The purpose of the wait_cnt is to ensure deterministic destruction + * of objects used by external drivers and destroyed by this function. + * Incrementing this wait_cnt should either be short lived, such as + * during ioctl execution, or be revoked and blocked during + * pre_destroy(), such as vdev holding the idev's refcount. */ - if (flags & REMOVE_WAIT_SHORTTERM) { - ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (flags & REMOVE_WAIT) { + ret = iommufd_object_dec_wait(ictx, to_destroy); if (ret) { /* * We have a bug. Put back the callers reference and @@ -141,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, refcount_dec(&to_destroy->users); return ret; } - zerod_shortterm = true; + zerod_wait_cnt = true; } xa_lock(&ictx->objects); @@ -167,17 +230,17 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, goto err_xa; } - xas_store(&xas, NULL); + xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); /* - * Since users is zero any positive users_shortterm must be racing + * Since users is zero any positive wait_cnt must be racing * iommufd_put_object(), or we have a bug. */ - if (!zerod_shortterm) { - ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (!zerod_wait_cnt) { + ret = iommufd_object_dec_wait(ictx, obj); if (WARN_ON(ret)) return ret; } @@ -187,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, return 0; err_xa: - if (zerod_shortterm) { + if (zerod_wait_cnt) { /* Restore the xarray owned reference */ - refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->wait_cnt, 1); } xa_unlock(&ictx->objects); @@ -226,7 +289,10 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); init_waitqueue_head(&ictx->destroy_wait); + mutex_init(&ictx->sw_msi_lock); + INIT_LIST_HEAD(&ictx->sw_msi_list); filp->private_data = ictx; return 0; } @@ -234,6 +300,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) static int iommufd_fops_release(struct inode *inode, struct file *filp) { struct iommufd_ctx *ictx = filp->private_data; + struct iommufd_sw_msi_map *next; + struct iommufd_sw_msi_map *cur; struct iommufd_object *obj; /* @@ -248,20 +316,47 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) while (!xa_empty(&ictx->objects)) { unsigned int destroyed = 0; unsigned long index; + bool empty = true; + /* + * We can't use xa_empty() to end the loop as the tombstones + * are stored as XA_ZERO_ENTRY in the xarray. However + * xa_for_each() automatically converts them to NULL and skips + * them causing xa_empty() to be kept false. Thus once + * xa_for_each() finds no further !NULL entries the loop is + * done. + */ xa_for_each(&ictx->objects, index, obj) { + empty = false; if (!refcount_dec_if_one(&obj->users)) continue; + destroyed++; xa_erase(&ictx->objects, index); iommufd_object_ops[obj->type].destroy(obj); kfree(obj); } + + if (empty) + break; + /* Bug related to users refcount */ if (WARN_ON(!destroyed)) break; } + + /* + * There may be some tombstones left over from + * iommufd_object_tombstone_user() + */ + xa_destroy(&ictx->objects); + WARN_ON(!xa_empty(&ictx->groups)); + + mutex_destroy(&ictx->sw_msi_lock); + list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item) + kfree(cur); + kfree(ictx); return 0; } @@ -296,6 +391,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_fault_alloc fault; struct iommu_hw_info info; + struct iommu_hw_queue_alloc hw_queue; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_invalidate cache; @@ -307,9 +403,10 @@ union ucmd_buffer { struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; struct iommu_option option; + struct iommu_vdevice_alloc vdev; + struct iommu_veventq_alloc veventq; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; - struct iommu_vdevice_alloc vdev; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif @@ -333,10 +430,12 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), - IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, - out_fault_fd), + IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, + struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), + IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl, + struct iommu_hw_queue_alloc, length), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, @@ -355,20 +454,20 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { src_iova), IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, struct iommu_ioas_iova_ranges, out_iova_alignment), - IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, - iova), + IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova), IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file, struct iommu_ioas_map_file, iova), IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, length), - IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, - val64), + IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), + IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, + struct iommu_vdevice_alloc, virt_id), + IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc, + struct iommu_veventq_alloc, out_veventq_fd), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id), - IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, - struct iommu_vdevice_alloc, virt_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif @@ -407,14 +506,83 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, if (ret) return ret; ret = op->execute(&ucmd); + + if (ucmd.new_obj) { + if (ret) + iommufd_object_abort_and_destroy(ictx, ucmd.new_obj); + else + iommufd_object_finalize(ictx, ucmd.new_obj); + } return ret; } +static void iommufd_fops_vma_open(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_inc(&immap->owner->users); +} + +static void iommufd_fops_vma_close(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_dec(&immap->owner->users); +} + +static const struct vm_operations_struct iommufd_vma_ops = { + .open = iommufd_fops_vma_open, + .close = iommufd_fops_vma_close, +}; + +/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */ +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct iommufd_ctx *ictx = filp->private_data; + size_t length = vma->vm_end - vma->vm_start; + struct iommufd_mmap *immap; + int rc; + + if (!PAGE_ALIGNED(length)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_flags & VM_EXEC) + return -EPERM; + + /* vma->vm_pgoff carries a page-shifted start position to an immap */ + immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT); + if (!immap) + return -ENXIO; + /* + * mtree_load() returns the immap for any contained mmio_addr, so only + * allow the exact immap thing to be mapped + */ + if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length) + return -ENXIO; + + vma->vm_pgoff = 0; + vma->vm_private_data = immap; + vma->vm_ops = &iommufd_vma_ops; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, + immap->mmio_addr >> PAGE_SHIFT, length, + vma->vm_page_prot); + if (rc) + return rc; + + /* vm_ops.open won't be called for mmap itself. */ + refcount_inc(&immap->owner->users); + return rc; +} + static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, + .mmap = iommufd_fops_mmap, }; /** @@ -427,7 +595,7 @@ void iommufd_ctx_get(struct iommufd_ctx *ictx) { get_file(ictx->file); } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, "IOMMUFD"); /** * iommufd_ctx_from_file - Acquires a reference to the iommufd context @@ -447,7 +615,7 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) iommufd_ctx_get(ictx); return ictx; } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, "IOMMUFD"); /** * iommufd_ctx_from_fd - Acquires a reference to the iommufd context @@ -471,7 +639,7 @@ struct iommufd_ctx *iommufd_ctx_from_fd(int fd) /* fget is the same as iommufd_ctx_get() */ return file->private_data; } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, "IOMMUFD"); /** * iommufd_ctx_put - Put back a reference @@ -481,17 +649,21 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) { fput(ictx->file); } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, "IOMMUFD"); static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_ACCESS] = { .destroy = iommufd_access_destroy_object, }, [IOMMUFD_OBJ_DEVICE] = { + .pre_destroy = iommufd_device_pre_destroy, .destroy = iommufd_device_destroy, }, - [IOMMUFD_OBJ_IOAS] = { - .destroy = iommufd_ioas_destroy, + [IOMMUFD_OBJ_FAULT] = { + .destroy = iommufd_fault_destroy, + }, + [IOMMUFD_OBJ_HW_QUEUE] = { + .destroy = iommufd_hw_queue_destroy, }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, @@ -501,14 +673,19 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, - [IOMMUFD_OBJ_FAULT] = { - .destroy = iommufd_fault_destroy, - }, - [IOMMUFD_OBJ_VIOMMU] = { - .destroy = iommufd_viommu_destroy, + [IOMMUFD_OBJ_IOAS] = { + .destroy = iommufd_ioas_destroy, }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, + .abort = iommufd_vdevice_abort, + }, + [IOMMUFD_OBJ_VEVENTQ] = { + .destroy = iommufd_veventq_destroy, + .abort = iommufd_veventq_abort, + }, + [IOMMUFD_OBJ_VIOMMU] = { + .destroy = iommufd_viommu_destroy, }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { @@ -525,7 +702,6 @@ static struct miscdevice iommu_misc_dev = { .mode = 0660, }; - static struct miscdevice vfio_misc_dev = { .minor = VFIO_MINOR, .name = "vfio", @@ -575,7 +751,7 @@ module_exit(iommufd_exit); MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif -MODULE_IMPORT_NS(IOMMUFD_INTERNAL); -MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); +MODULE_IMPORT_NS("IOMMUFD"); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 3427749bc5ce..c3433b845561 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1287,8 +1287,7 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, } static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte, - unsigned long length, - bool writable) + unsigned long length, bool writable) { struct iopt_pages *pages; @@ -1328,7 +1327,7 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, struct iopt_pages *pages; unsigned long end; void __user *uptr_down = - (void __user *) ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); + (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); if (check_add_overflow((unsigned long)uptr, length, &end)) return ERR_PTR(-EOVERFLOW); @@ -2104,6 +2103,7 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, * @last_index: Inclusive last page index * @out_pages: Output list of struct page's representing the PFNs * @flags: IOMMUFD_ACCESS_RW_* flags + * @lock_area: Fail userspace munmap on this area * * Record that an in-kernel access will be accessing the pages, ensure they are * pinned, and return the PFNs as a simple list of 'struct page *'. @@ -2111,8 +2111,8 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, * This should be undone through a matching call to iopt_area_remove_access() */ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, - unsigned long last_index, struct page **out_pages, - unsigned int flags) + unsigned long last_index, struct page **out_pages, + unsigned int flags, bool lock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; @@ -2125,6 +2125,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, access = iopt_pages_get_exact_access(pages, start_index, last_index); if (access) { area->num_accesses++; + if (lock_area) + area->num_locks++; access->users++; iopt_pages_fill_from_xarray(pages, start_index, last_index, out_pages); @@ -2146,6 +2148,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, access->node.last = last_index; access->users = 1; area->num_accesses++; + if (lock_area) + area->num_locks++; interval_tree_insert(&access->node, &pages->access_itree); mutex_unlock(&pages->mutex); return 0; @@ -2162,12 +2166,13 @@ err_unlock: * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index + * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area * * Undo iopt_area_add_access() and unpin the pages if necessary. The caller * must stop using the PFNs before calling this. */ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, - unsigned long last_index) + unsigned long last_index, bool unlock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; @@ -2178,6 +2183,10 @@ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, goto out_unlock; WARN_ON(area->num_accesses == 0 || access->users == 0); + if (unlock_area) { + WARN_ON(area->num_locks == 0); + area->num_locks--; + } area->num_accesses--; access->users--; if (access->users) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index a0de6d6d4e68..61686603c769 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -58,6 +58,9 @@ enum { MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2, }; +static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain); +static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain); + /* * Syzkaller has trouble randomizing the correct iova to use since it is linked * to the map ioctl's output, and it has no ide about that. So, simplify things. @@ -135,7 +138,6 @@ to_mock_domain(struct iommu_domain *domain) struct mock_iommu_domain_nested { struct iommu_domain domain; struct mock_viommu *mock_viommu; - struct mock_iommu_domain *parent; u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM]; }; @@ -148,6 +150,11 @@ to_mock_nested(struct iommu_domain *domain) struct mock_viommu { struct iommufd_viommu core; struct mock_iommu_domain *s2_parent; + struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX]; + struct mutex queue_mutex; + + unsigned long mmap_offset; + u32 *page; /* Mmap page to test u32 type of in_data */ }; static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) @@ -155,15 +162,34 @@ static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) return container_of(viommu, struct mock_viommu, core); } +struct mock_hw_queue { + struct iommufd_hw_queue core; + struct mock_viommu *mock_viommu; + struct mock_hw_queue *prev; + u16 index; +}; + +static inline struct mock_hw_queue * +to_mock_hw_queue(struct iommufd_hw_queue *hw_queue) +{ + return container_of(hw_queue, struct mock_hw_queue, core); +} + enum selftest_obj_type { TYPE_IDEV, }; struct mock_dev { struct device dev; + struct mock_viommu *viommu; + struct rw_semaphore viommu_rwsem; unsigned long flags; + unsigned long vdev_id; int id; u32 cache[MOCK_DEV_CACHE_NUM]; + atomic_t pasid_1024_fake_error; + unsigned int iopf_refcount; + struct iommu_domain *domain; }; static inline struct mock_dev *to_mock_dev(struct device *dev) @@ -193,15 +219,85 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { struct mock_dev *mdev = to_mock_dev(dev); + struct mock_viommu *new_viommu = NULL; + unsigned long vdev_id = 0; + int rc; if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) return -EINVAL; + iommu_group_mutex_assert(dev); + if (domain->type == IOMMU_DOMAIN_NESTED) { + new_viommu = to_mock_nested(domain)->mock_viommu; + if (new_viommu) { + rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev, + &vdev_id); + if (rc) + return rc; + } + } + if (new_viommu != mdev->viommu) { + down_write(&mdev->viommu_rwsem); + mdev->viommu = new_viommu; + mdev->vdev_id = vdev_id; + up_write(&mdev->viommu_rwsem); + } + + rc = mock_dev_enable_iopf(dev, domain); + if (rc) + return rc; + + mock_dev_disable_iopf(dev, mdev->domain); + mdev->domain = domain; + + return 0; +} + +static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) +{ + struct mock_dev *mdev = to_mock_dev(dev); + int rc; + + /* + * Per the first attach with pasid 1024, set the + * mdev->pasid_1024_fake_error. Hence the second call of this op + * can fake an error to validate the error path of the core. This + * is helpful to test the case in which the iommu core needs to + * rollback to the old domain due to driver failure. e.g. replace. + * User should be careful about the third call of this op, it shall + * succeed since the mdev->pasid_1024_fake_error is cleared in the + * second call. + */ + if (pasid == 1024) { + if (domain->type == IOMMU_DOMAIN_BLOCKED) { + atomic_set(&mdev->pasid_1024_fake_error, 0); + } else if (atomic_read(&mdev->pasid_1024_fake_error)) { + /* + * Clear the flag, and fake an error to fail the + * replacement. + */ + atomic_set(&mdev->pasid_1024_fake_error, 0); + return -ENOMEM; + } else { + /* Set the flag to fake an error in next call */ + atomic_set(&mdev->pasid_1024_fake_error, 1); + } + } + + rc = mock_dev_enable_iopf(dev, domain); + if (rc) + return rc; + + mock_dev_disable_iopf(dev, old); + return 0; } static const struct iommu_domain_ops mock_blocking_ops = { .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop }; static struct iommu_domain mock_blocking_domain = { @@ -209,10 +305,15 @@ static struct iommu_domain mock_blocking_domain = { .ops = &mock_blocking_ops, }; -static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) +static void *mock_domain_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct iommu_test_hw_info *info; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_SELFTEST) + return ERR_PTR(-EOPNOTSUPP); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); @@ -311,25 +412,6 @@ static const struct iommu_dirty_ops dirty_ops = { .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; -static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) -{ - struct mock_dev *mdev = to_mock_dev(dev); - struct mock_iommu_domain *mock; - - mock = kzalloc(sizeof(*mock), GFP_KERNEL); - if (!mock) - return NULL; - mock->domain.geometry.aperture_start = MOCK_APERTURE_START; - mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; - mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; - if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) - mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; - mock->domain.ops = mock_ops.default_domain_ops; - mock->domain.type = IOMMU_DOMAIN_UNMANAGED; - xa_init(&mock->pfns); - return &mock->domain; -} - static struct mock_iommu_domain_nested * __mock_domain_alloc_nested(const struct iommu_user_data *user_data) { @@ -362,7 +444,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, struct mock_iommu_domain_nested *mock_nested; struct mock_iommu_domain *mock_parent; - if (flags) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); if (!parent || parent->ops != mock_ops.default_domain_ops) return ERR_PTR(-EINVAL); @@ -374,7 +456,6 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, mock_nested = __mock_domain_alloc_nested(user_data); if (IS_ERR(mock_nested)) return ERR_CAST(mock_nested); - mock_nested->parent = mock_parent; return &mock_nested->domain; } @@ -384,22 +465,32 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags, { bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_ALLOC_NEST_PARENT; - bool no_dirty_ops = to_mock_dev(dev)->flags & - MOCK_FLAGS_DEVICE_NO_DIRTY; - struct iommu_domain *domain; + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID; + struct mock_dev *mdev = to_mock_dev(dev); + bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; + struct mock_iommu_domain *mock; if (user_data) return ERR_PTR(-EOPNOTSUPP); if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops)) return ERR_PTR(-EOPNOTSUPP); - domain = mock_domain_alloc_paging(dev); - if (!domain) + mock = kzalloc(sizeof(*mock), GFP_KERNEL); + if (!mock) return ERR_PTR(-ENOMEM); + mock->domain.geometry.aperture_start = MOCK_APERTURE_START; + mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; + mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) + mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; + mock->domain.ops = mock_ops.default_domain_ops; + mock->domain.type = IOMMU_DOMAIN_UNMANAGED; + xa_init(&mock->pfns); + if (has_dirty_flag) - domain->dirty_ops = &dirty_ops; - return domain; + mock->domain.dirty_ops = &dirty_ops; + return &mock->domain; } static void mock_domain_free(struct iommu_domain *domain) @@ -559,31 +650,57 @@ static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt { } -static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain) { - if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + struct mock_dev *mdev = to_mock_dev(dev); + int ret; + + if (!domain || !domain->iopf_handler) + return 0; + + if (!mock_iommu_iopf_queue) return -ENODEV; - return iopf_queue_add_device(mock_iommu_iopf_queue, dev); + if (mdev->iopf_refcount) { + mdev->iopf_refcount++; + return 0; + } + + ret = iopf_queue_add_device(mock_iommu_iopf_queue, dev); + if (ret) + return ret; + + mdev->iopf_refcount = 1; + + return 0; } -static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain) { - if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) - return -ENODEV; + struct mock_dev *mdev = to_mock_dev(dev); - iopf_queue_remove_device(mock_iommu_iopf_queue, dev); + if (!domain || !domain->iopf_handler) + return; - return 0; + if (--mdev->iopf_refcount) + return; + + iopf_queue_remove_device(mock_iommu_iopf_queue, dev); } static void mock_viommu_destroy(struct iommufd_viommu *viommu) { struct mock_iommu_device *mock_iommu = container_of( viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); if (refcount_dec_and_test(&mock_iommu->users)) complete(&mock_iommu->complete); + if (mock_viommu->mmap_offset) + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); + free_page((unsigned long)mock_viommu->page); + mutex_destroy(&mock_viommu->queue_mutex); /* iommufd core frees mock_viommu and viommu */ } @@ -595,14 +712,13 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, struct mock_viommu *mock_viommu = to_mock_viommu(viommu); struct mock_iommu_domain_nested *mock_nested; - if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); mock_nested = __mock_domain_alloc_nested(user_data); if (IS_ERR(mock_nested)) return ERR_CAST(mock_nested); mock_nested->mock_viommu = mock_viommu; - mock_nested->parent = mock_viommu->s2_parent; return &mock_nested->domain; } @@ -676,31 +792,149 @@ out: return rc; } +static size_t mock_viommu_get_hw_queue_size(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type) +{ + if (queue_type != IOMMU_HW_QUEUE_TYPE_SELFTEST) + return 0; + return HW_QUEUE_STRUCT_SIZE(struct mock_hw_queue, core); +} + +static void mock_hw_queue_destroy(struct iommufd_hw_queue *hw_queue) +{ + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_viommu *mock_viommu = mock_hw_queue->mock_viommu; + + mutex_lock(&mock_viommu->queue_mutex); + mock_viommu->hw_queue[mock_hw_queue->index] = NULL; + if (mock_hw_queue->prev) + iommufd_hw_queue_undepend(mock_hw_queue, mock_hw_queue->prev, + core); + mutex_unlock(&mock_viommu->queue_mutex); +} + +/* Test iommufd_hw_queue_depend/undepend() */ +static int mock_hw_queue_init_phys(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa) +{ + struct mock_viommu *mock_viommu = to_mock_viommu(hw_queue->viommu); + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_hw_queue *prev = NULL; + int rc = 0; + + if (index >= IOMMU_TEST_HW_QUEUE_MAX) + return -EINVAL; + + mutex_lock(&mock_viommu->queue_mutex); + + if (mock_viommu->hw_queue[index]) { + rc = -EEXIST; + goto unlock; + } + + if (index) { + prev = mock_viommu->hw_queue[index - 1]; + if (!prev) { + rc = -EIO; + goto unlock; + } + } + + /* + * Test to catch a kernel bug if the core converted the physical address + * incorrectly. Let mock_domain_iova_to_phys() WARN_ON if it fails. + */ + if (base_addr_pa != iommu_iova_to_phys(&mock_viommu->s2_parent->domain, + hw_queue->base_addr)) { + rc = -EFAULT; + goto unlock; + } + + if (prev) { + rc = iommufd_hw_queue_depend(mock_hw_queue, prev, core); + if (rc) + goto unlock; + } + + mock_hw_queue->prev = prev; + mock_hw_queue->mock_viommu = mock_viommu; + mock_viommu->hw_queue[index] = mock_hw_queue; + + hw_queue->destroy = &mock_hw_queue_destroy; +unlock: + mutex_unlock(&mock_viommu->queue_mutex); + return rc; +} + static struct iommufd_viommu_ops mock_viommu_ops = { .destroy = mock_viommu_destroy, .alloc_domain_nested = mock_viommu_alloc_domain_nested, .cache_invalidate = mock_viommu_cache_invalidate, + .get_hw_queue_size = mock_viommu_get_hw_queue_size, + .hw_queue_init_phys = mock_hw_queue_init_phys, }; -static struct iommufd_viommu *mock_viommu_alloc(struct device *dev, - struct iommu_domain *domain, - struct iommufd_ctx *ictx, - unsigned int viommu_type) +static size_t mock_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type) { - struct mock_iommu_device *mock_iommu = - iommu_get_iommu_dev(dev, struct mock_iommu_device, iommu_dev); - struct mock_viommu *mock_viommu; - if (viommu_type != IOMMU_VIOMMU_TYPE_SELFTEST) - return ERR_PTR(-EOPNOTSUPP); + return 0; + return VIOMMU_STRUCT_SIZE(struct mock_viommu, core); +} + +static int mock_viommu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) +{ + struct mock_iommu_device *mock_iommu = container_of( + viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); + struct iommu_viommu_selftest data; + int rc; - mock_viommu = iommufd_viommu_alloc(ictx, struct mock_viommu, core, - &mock_viommu_ops); - if (IS_ERR(mock_viommu)) - return ERR_CAST(mock_viommu); + if (user_data) { + rc = iommu_copy_struct_from_user( + &data, user_data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data); + if (rc) + return rc; + + /* Allocate two pages */ + mock_viommu->page = + (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!mock_viommu->page) + return -ENOMEM; + + rc = iommufd_viommu_alloc_mmap(&mock_viommu->core, + __pa(mock_viommu->page), + PAGE_SIZE * 2, + &mock_viommu->mmap_offset); + if (rc) + goto err_free_page; + + /* For loopback tests on both the page and out_data */ + *mock_viommu->page = data.in_data; + data.out_data = data.in_data; + data.out_mmap_length = PAGE_SIZE * 2; + data.out_mmap_offset = mock_viommu->mmap_offset; + rc = iommu_copy_struct_to_user( + user_data, &data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data); + if (rc) + goto err_destroy_mmap; + } refcount_inc(&mock_iommu->users); - return &mock_viommu->core; + mutex_init(&mock_viommu->queue_mutex); + mock_viommu->s2_parent = to_mock_domain(parent_domain); + + viommu->ops = &mock_viommu_ops; + return 0; + +err_destroy_mmap: + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); +err_free_page: + free_page((unsigned long)mock_viommu->page); + return rc; } static const struct iommu_ops mock_ops = { @@ -711,19 +945,16 @@ static const struct iommu_ops mock_ops = { .default_domain = &mock_blocking_domain, .blocked_domain = &mock_blocking_domain, .owner = THIS_MODULE, - .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, - .domain_alloc_paging = mock_domain_alloc_paging, .domain_alloc_paging_flags = mock_domain_alloc_paging_flags, .domain_alloc_nested = mock_domain_alloc_nested, .capable = mock_domain_capable, .device_group = generic_device_group, .probe_device = mock_probe_device, .page_response = mock_domain_page_response, - .dev_enable_feat = mock_dev_enable_feat, - .dev_disable_feat = mock_dev_disable_feat, .user_pasid_table = true, - .viommu_alloc = mock_viommu_alloc, + .get_viommu_size = mock_get_viommu_size, + .viommu_init = mock_viommu_init, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -731,6 +962,7 @@ static const struct iommu_ops mock_ops = { .map_pages = mock_domain_map_pages, .unmap_pages = mock_domain_unmap_pages, .iova_to_phys = mock_domain_iova_to_phys, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }, }; @@ -791,6 +1023,7 @@ static struct iommu_domain_ops domain_nested_ops = { .free = mock_domain_free_nested, .attach_dev = mock_domain_nop_attach, .cache_invalidate_user = mock_domain_cache_invalidate_user, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }; static inline struct iommufd_hw_pagetable * @@ -850,17 +1083,24 @@ static void mock_dev_release(struct device *dev) static struct mock_dev *mock_dev_create(unsigned long dev_flags) { + struct property_entry prop[] = { + PROPERTY_ENTRY_U32("pasid-num-bits", 0), + {}, + }; + const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY | + MOCK_FLAGS_DEVICE_HUGE_IOVA | + MOCK_FLAGS_DEVICE_PASID; struct mock_dev *mdev; int rc, i; - if (dev_flags & - ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA)) + if (dev_flags & ~valid_flags) return ERR_PTR(-EINVAL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return ERR_PTR(-ENOMEM); + init_rwsem(&mdev->viommu_rwsem); device_initialize(&mdev->dev); mdev->flags = dev_flags; mdev->dev.release = mock_dev_release; @@ -877,6 +1117,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) if (rc) goto err_put; + if (dev_flags & MOCK_FLAGS_DEVICE_PASID) + prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH); + + rc = device_create_managed_software_node(&mdev->dev, prop, NULL); + if (rc) { + dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc); + goto err_put; + } + rc = device_add(&mdev->dev); if (rc) goto err_put; @@ -932,7 +1181,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, } sobj->idev.idev = idev; - rc = iommufd_device_attach(idev, &pt_id); + rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_unbind; @@ -947,7 +1196,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, return 0; out_detach: - iommufd_device_detach(idev); + iommufd_device_detach(idev, IOMMU_NO_PASID); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -957,39 +1206,49 @@ out_sobj: return rc; } -/* Replace the mock domain with a manually allocated hw_pagetable */ -static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, - unsigned int device_id, u32 pt_id, - struct iommu_test_cmd *cmd) +static struct selftest_obj * +iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id) { struct iommufd_object *dev_obj; struct selftest_obj *sobj; - int rc; /* * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure * it doesn't race with detach, which is not allowed. */ - dev_obj = - iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); + dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST); if (IS_ERR(dev_obj)) - return PTR_ERR(dev_obj); + return ERR_CAST(dev_obj); sobj = to_selftest_obj(dev_obj); if (sobj->type != TYPE_IDEV) { - rc = -EINVAL; - goto out_dev_obj; + iommufd_put_object(ictx, dev_obj); + return ERR_PTR(-EINVAL); } + return sobj; +} + +/* Replace the mock domain with a manually allocated hw_pagetable */ +static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, + unsigned int device_id, u32 pt_id, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; - rc = iommufd_device_replace(sobj->idev.idev, &pt_id); + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id); if (rc) - goto out_dev_obj; + goto out_sobj; cmd->mock_domain_replace.pt_id = pt_id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); -out_dev_obj: - iommufd_put_object(ucmd->ictx, dev_obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); return rc; } @@ -1101,9 +1360,8 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd, return 0; } -static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, - u32 mockpt_id, unsigned int iotlb_id, - u32 iotlb) +static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, u32 mockpt_id, + unsigned int iotlb_id, u32 iotlb) { struct mock_iommu_domain_nested *mock_nested; struct iommufd_hw_pagetable *hwpt; @@ -1376,7 +1634,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, int rc; /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ - if (length > 16*1024*1024) + if (length > 16 * 1024 * 1024) return -ENOMEM; if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ)) @@ -1393,7 +1651,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, if (flags & MOCK_FLAGS_ACCESS_SYZ) iova = iommufd_test_syz_conv_iova(staccess->access, - &cmd->access_pages.iova); + &cmd->access_pages.iova); npages = (ALIGN(iova + length, PAGE_SIZE) - ALIGN_DOWN(iova, PAGE_SIZE)) / @@ -1469,7 +1727,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, int rc; /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ - if (length > 16*1024*1024) + if (length > 16 * 1024 * 1024) return -ENOMEM; if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH | @@ -1495,7 +1753,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, if (flags & MOCK_FLAGS_ACCESS_SYZ) iova = iommufd_test_syz_conv_iova(staccess->access, - &cmd->access_rw.iova); + &cmd->access_rw.iova); rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); if (rc) @@ -1550,7 +1808,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, goto out_put; } - if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) { + if (copy_from_user(tmp, uptr, DIV_ROUND_UP(max, BITS_PER_BYTE))) { rc = -EFAULT; goto out_free; } @@ -1586,7 +1844,7 @@ out_put: static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, struct iommu_test_cmd *cmd) { - struct iopf_fault event = { }; + struct iopf_fault event = {}; struct iommufd_device *idev; idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id); @@ -1608,13 +1866,165 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, return 0; } +static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct iommu_viommu_event_selftest test = {}; + struct iommufd_device *idev; + struct mock_dev *mdev; + int rc = -ENOENT; + + idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + mdev = to_mock_dev(idev->dev); + + down_read(&mdev->viommu_rwsem); + if (!mdev->viommu || !mdev->vdev_id) + goto out_unlock; + + test.virt_id = mdev->vdev_id; + rc = iommufd_viommu_report_event(&mdev->viommu->core, + IOMMU_VEVENTQ_TYPE_SELFTEST, &test, + sizeof(test)); +out_unlock: + up_read(&mdev->viommu_rwsem); + iommufd_put_object(ucmd->ictx, &idev->obj); + + return rc; +} + +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) +{ + struct iommufd_object *pt_obj; + + pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return ERR_CAST(pt_obj); + + if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED && + pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) { + iommufd_put_object(ucmd->ictx, pt_obj); + return ERR_PTR(-EINVAL); + } + + return container_of(pt_obj, struct iommufd_hw_pagetable, obj); +} + +static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + u32 hwpt_id = cmd->pasid_check.hwpt_id; + struct iommu_domain *attached_domain; + struct iommu_attach_handle *handle; + struct iommufd_hw_pagetable *hwpt; + struct selftest_obj *sobj; + struct mock_dev *mdev; + int rc = 0; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + mdev = sobj->idev.mock_dev; + + handle = iommu_attach_handle_get(mdev->dev.iommu_group, + cmd->pasid_check.pasid, 0); + if (IS_ERR(handle)) + attached_domain = NULL; + else + attached_domain = handle->domain; + + /* hwpt_id == 0 means to check if pasid is detached */ + if (!hwpt_id) { + if (attached_domain) + rc = -EINVAL; + goto out_sobj; + } + + hwpt = iommufd_get_hwpt(ucmd, hwpt_id); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_sobj; + } + + if (attached_domain != hwpt->domain) + rc = -EINVAL; + + iommufd_put_object(ucmd->ictx, &hwpt->obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + iommufd_device_detach(sobj->idev.idev, cmd->pasid_attach.pasid); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid); + iommufd_put_object(ucmd->ictx, &sobj->obj); + return 0; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = to_selftest_obj(obj); switch (sobj->type) { case TYPE_IDEV: - iommufd_device_detach(sobj->idev.idev); + iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID); iommufd_device_unbind(sobj->idev.idev); mock_dev_destroy(sobj->idev.mock_dev); break; @@ -1689,6 +2099,16 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->dirty.flags); case IOMMU_TEST_OP_TRIGGER_IOPF: return iommufd_test_trigger_iopf(ucmd, cmd); + case IOMMU_TEST_OP_TRIGGER_VEVENT: + return iommufd_test_trigger_vevent(ucmd, cmd); + case IOMMU_TEST_OP_PASID_ATTACH: + return iommufd_test_pasid_attach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_REPLACE: + return iommufd_test_pasid_replace(ucmd, cmd); + case IOMMU_TEST_OP_PASID_DETACH: + return iommufd_test_pasid_detach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_CHECK_HWPT: + return iommufd_test_pasid_check_hwpt(ucmd, cmd); default: return -EOPNOTSUPP; } @@ -1726,8 +2146,8 @@ int __init iommufd_test_init(void) goto err_bus; rc = iommu_device_register_bus(&mock_iommu.iommu_dev, &mock_ops, - &iommufd_mock_bus_type.bus, - &iommufd_mock_bus_type.nb); + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); if (rc) goto err_sysfs; @@ -1735,6 +2155,7 @@ int __init iommufd_test_init(void) init_completion(&mock_iommu.complete); mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq"); + mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH); return 0; diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 514aacd64009..a258ee2f4579 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -44,7 +44,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) iommufd_put_object(ictx, &ioas->obj); return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, "IOMMUFD_VFIO"); /** * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached @@ -66,7 +66,7 @@ int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) xa_unlock(&ictx->objects); return ret; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, "IOMMUFD_VFIO"); /** * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created @@ -118,7 +118,7 @@ out_abort: iommufd_object_abort(ictx, &ioas->obj); return ret; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, "IOMMUFD_VFIO"); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 69b88e8c7c26..462b457ffd0c 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -17,10 +17,16 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_viommu_alloc *cmd = ucmd->cmd; + const struct iommu_user_data user_data = { + .type = cmd->type, + .uptr = u64_to_user_ptr(cmd->data_uptr), + .len = cmd->data_len, + }; struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_viommu *viommu; struct iommufd_device *idev; const struct iommu_ops *ops; + size_t viommu_size; int rc; if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT) @@ -31,7 +37,22 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) return PTR_ERR(idev); ops = dev_iommu_ops(idev->dev); - if (!ops->viommu_alloc) { + if (!ops->get_viommu_size || !ops->viommu_init) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + + viommu_size = ops->get_viommu_size(idev->dev, cmd->type); + if (!viommu_size) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + + /* + * It is a driver bug for providing a viommu_size smaller than the core + * vIOMMU structure size + */ + if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) { rc = -EOPNOTSUPP; goto out_put_idev; } @@ -47,8 +68,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_hwpt; } - viommu = ops->viommu_alloc(idev->dev, hwpt_paging->common.domain, - ucmd->ictx, cmd->type); + viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd( + ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU); if (IS_ERR(viommu)) { rc = PTR_ERR(viommu); goto out_put_hwpt; @@ -59,6 +80,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging; refcount_inc(&viommu->hwpt->common.obj.users); + INIT_LIST_HEAD(&viommu->veventqs); + init_rwsem(&viommu->veventqs_rwsem); /* * It is the most likely case that a physical IOMMU is unpluggable. A * pluggable IOMMU instance (if exists) is responsible for refcounting @@ -66,15 +89,20 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) */ viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); + rc = ops->viommu_init(viommu, hwpt_paging->common.domain, + user_data.len ? &user_data : NULL); + if (rc) + goto out_put_hwpt; + + /* It is a driver bug that viommu->ops isn't filled */ + if (WARN_ON_ONCE(!viommu->ops)) { + rc = -EOPNOTSUPP; + goto out_put_hwpt; + } + cmd->out_viommu_id = viommu->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); - if (rc) - goto out_abort; - iommufd_object_finalize(ucmd->ictx, &viommu->obj); - goto out_put_hwpt; -out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj); out_put_hwpt: iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); out_put_idev: @@ -82,22 +110,41 @@ out_put_idev: return rc; } -void iommufd_vdevice_destroy(struct iommufd_object *obj) +void iommufd_vdevice_abort(struct iommufd_object *obj) { struct iommufd_vdevice *vdev = container_of(obj, struct iommufd_vdevice, obj); struct iommufd_viommu *viommu = vdev->viommu; + struct iommufd_device *idev = vdev->idev; + + lockdep_assert_held(&idev->igroup->lock); + if (vdev->destroy) + vdev->destroy(vdev); /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ - xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL); + xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); - put_device(vdev->dev); + idev->vdev = NULL; +} + +void iommufd_vdevice_destroy(struct iommufd_object *obj) +{ + struct iommufd_vdevice *vdev = + container_of(obj, struct iommufd_vdevice, obj); + struct iommufd_device *idev = vdev->idev; + struct iommufd_ctx *ictx = idev->ictx; + + mutex_lock(&idev->igroup->lock); + iommufd_vdevice_abort(obj); + mutex_unlock(&idev->igroup->lock); + iommufd_put_object(ictx, &idev->obj); } int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_vdevice_alloc *cmd = ucmd->cmd; struct iommufd_vdevice *vdev, *curr; + size_t vdev_size = sizeof(*vdev); struct iommufd_viommu *viommu; struct iommufd_device *idev; u64 virt_id = cmd->virt_id; @@ -122,17 +169,54 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } - vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE); + mutex_lock(&idev->igroup->lock); + if (idev->destroying) { + rc = -ENOENT; + goto out_unlock_igroup; + } + + if (idev->vdev) { + rc = -EEXIST; + goto out_unlock_igroup; + } + + if (viommu->ops && viommu->ops->vdevice_size) { + /* + * It is a driver bug for: + * - ops->vdevice_size smaller than the core structure size + * - not implementing a pairing ops->vdevice_init op + */ + if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size || + !viommu->ops->vdevice_init)) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + vdev_size = viommu->ops->vdevice_size; + } + + vdev = (struct iommufd_vdevice *)_iommufd_object_alloc( + ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE); if (IS_ERR(vdev)) { rc = PTR_ERR(vdev); - goto out_put_idev; + goto out_unlock_igroup; } - vdev->id = virt_id; - vdev->dev = idev->dev; - get_device(idev->dev); + vdev->virt_id = virt_id; vdev->viommu = viommu; refcount_inc(&viommu->obj.users); + /* + * A wait_cnt reference is held on the idev so long as we have the + * pointer. iommufd_device_pre_destroy() will revoke it before the + * idev real destruction. + */ + vdev->idev = idev; + + /* + * iommufd_device_destroy() delays until idev->vdev is NULL before + * freeing the idev, which only happens once the vdev is finished + * destruction. + */ + idev->vdev = vdev; curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); if (curr) { @@ -140,17 +224,206 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_abort; } + if (viommu->ops && viommu->ops->vdevice_init) { + rc = viommu->ops->vdevice_init(vdev); + if (rc) + goto out_abort; + } + cmd->out_vdevice_id = vdev->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) goto out_abort; iommufd_object_finalize(ucmd->ictx, &vdev->obj); - goto out_put_idev; + goto out_unlock_igroup; out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_unlock_igroup: + mutex_unlock(&idev->igroup->lock); out_put_idev: - iommufd_put_object(ucmd->ictx, &idev->obj); + if (rc) + iommufd_put_object(ucmd->ictx, &idev->obj); +out_put_viommu: + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} + +static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx, + struct iommufd_access *access, + u64 base_iova, size_t length) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova); + u64 offset = base_iova - aligned_iova; + + iommufd_access_unpin_pages(access, aligned_iova, + PAGE_ALIGN(length + offset)); + iommufd_access_detach_internal(access); + iommufd_access_destroy_internal(ictx, access); +} + +void iommufd_hw_queue_destroy(struct iommufd_object *obj) +{ + struct iommufd_hw_queue *hw_queue = + container_of(obj, struct iommufd_hw_queue, obj); + + if (hw_queue->destroy) + hw_queue->destroy(hw_queue); + if (hw_queue->access) + iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx, + hw_queue->access, + hw_queue->base_addr, + hw_queue->length); + if (hw_queue->viommu) + refcount_dec(&hw_queue->viommu->obj.users); +} + +/* + * When the HW accesses the guest queue via physical addresses, the underlying + * physical pages of the guest queue must be contiguous. Also, for the security + * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of + * the guest queue from the nesting parent iopt while the HW is still accessing + * the guest queue memory physically, such a HW queue must require an access to + * pin the underlying pages and prevent that from happening. + */ +static struct iommufd_access * +iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, + struct iommufd_viommu *viommu, phys_addr_t *base_pa) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova); + u64 offset = cmd->nesting_parent_iova - aligned_iova; + struct iommufd_access *access; + struct page **pages; + size_t max_npages; + size_t length; + size_t i; + int rc; + + /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */ + if (check_add_overflow(offset, cmd->length, &length)) + return ERR_PTR(-ERANGE); + if (check_add_overflow(length, PAGE_SIZE - 1, &length)) + return ERR_PTR(-ERANGE); + max_npages = length / PAGE_SIZE; + /* length needs to be page aligned too */ + length = max_npages * PAGE_SIZE; + + /* + * Use kvcalloc() to avoid memory fragmentation for a large page array. + * Set __GFP_NOWARN to avoid syzkaller blowups + */ + pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN); + if (!pages) + return ERR_PTR(-ENOMEM); + + access = iommufd_access_create_internal(viommu->ictx); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_free; + } + + rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas); + if (rc) + goto out_destroy; + + rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0); + if (rc) + goto out_detach; + + /* Validate if the underlying physical pages are contiguous */ + for (i = 1; i < max_npages; i++) { + if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1) + continue; + rc = -EFAULT; + goto out_unpin; + } + + *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; + kvfree(pages); + return access; + +out_unpin: + iommufd_access_unpin_pages(access, aligned_iova, length); +out_detach: + iommufd_access_detach_internal(access); +out_destroy: + iommufd_access_destroy_internal(viommu->ictx, access); +out_free: + kvfree(pages); + return ERR_PTR(rc); +} + +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_hw_queue_alloc *cmd = ucmd->cmd; + struct iommufd_hw_queue *hw_queue; + struct iommufd_viommu *viommu; + struct iommufd_access *access; + size_t hw_queue_size; + phys_addr_t base_pa; + u64 last; + int rc; + + if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->length) + return -EINVAL; + if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1, + &last)) + return -EOVERFLOW; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + if (!viommu->ops || !viommu->ops->get_hw_queue_size || + !viommu->ops->hw_queue_init_phys) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type); + if (!hw_queue_size) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + /* + * It is a driver bug for providing a hw_queue_size smaller than the + * core HW queue structure size + */ + if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd( + ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE); + if (IS_ERR(hw_queue)) { + rc = PTR_ERR(hw_queue); + goto out_put_viommu; + } + + access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_put_viommu; + } + + hw_queue->viommu = viommu; + refcount_inc(&viommu->obj.users); + hw_queue->access = access; + hw_queue->type = cmd->type; + hw_queue->length = cmd->length; + hw_queue->base_addr = cmd->nesting_parent_iova; + + rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa); + if (rc) + goto out_put_viommu; + + cmd->out_hw_queue_id = hw_queue->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + out_put_viommu: iommufd_put_object(ucmd->ictx, &viommu->obj); return rc; |