From 86b0a96c2952fa07b782b37f6ec783ace63a01a6 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:36 -0700 Subject: iommufd: Add iommufd_ctx_has_group() This adds the helper to check if any device within the given iommu_group has been bound with the iommufd_ctx. This is helpful for the checking on device ownership for the devices which have not been bound but cannot be bound to any other iommufd_ctx as the iommu_group has been bound. Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-5-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/iommufd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 1129a36a74c4..f241bafa03da 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,6 +16,7 @@ struct page; struct iommufd_ctx; struct iommufd_access; struct file; +struct iommu_group; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); @@ -50,6 +51,7 @@ void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); void iommufd_ctx_put(struct iommufd_ctx *ictx); +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, -- cgit v1.2.3 From 78d3df457ae5eb53ef1f295a8a704691abea1b1d Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:37 -0700 Subject: iommufd: Add helper to retrieve iommufd_ctx and devid This is needed by the vfio-pci driver to report affected devices in the hot-reset for a given device. Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-6-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/iommu/iommufd/device.c | 12 ++++++++++++ include/linux/iommufd.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 693c2155a5da..cd5d8ab907f9 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -146,6 +146,18 @@ void iommufd_device_unbind(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) +{ + return idev->ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); + +u32 iommufd_device_to_id(struct iommufd_device *idev) +{ + return idev->obj.id; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); + static int iommufd_device_setup_msi(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt, phys_addr_t sw_msi_start) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index f241bafa03da..68defed9ea48 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -25,6 +25,9 @@ void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); +u32 iommufd_device_to_id(struct iommufd_device *idev); + struct iommufd_access_ops { u8 needs_pin_pages : 1; void (*unmap)(void *data, unsigned long iova, unsigned long length); -- cgit v1.2.3 From af949759bad27934b6f242e8a3b1c394e09fb4a3 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:38 -0700 Subject: vfio: Mark cdev usage in vfio_device This can be used to differentiate whether to report group_id or devid in the revised VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl. At this moment, no cdev path yet, so the vfio_device_cdev_opened() helper always returns false. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-7-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2c137ea94a3e..2a45853773a6 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -139,6 +139,11 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #endif +static inline bool vfio_device_cdev_opened(struct vfio_device *device) +{ + return false; +} + /** * struct vfio_migration_ops - VFIO bus device driver migration callbacks * -- cgit v1.2.3 From a80e1de93275fbfba0617e6bbea8522ea5329eb5 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:39 -0700 Subject: vfio: Add helper to search vfio_device in a dev_set There are drivers that need to search vfio_device within a given dev_set. e.g. vfio-pci. So add a helper. vfio_pci_is_device_in_set() now returns -EBUSY in commit a882c16a2b7e ("vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set") where it was trying to preserve the return of vfio_pci_try_zap_and_vma_lock_cb(). However, it makes more sense to return -ENODEV. Suggested-by: Alex Williamson Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-8-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 6 +----- drivers/vfio/vfio_main.c | 15 +++++++++++++++ include/linux/vfio.h | 3 +++ 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3d595ad2ed0a..5b5316a5484a 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2377,12 +2377,8 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data) { struct vfio_device_set *dev_set = data; - struct vfio_device *cur; - list_for_each_entry(cur, &dev_set->device_list, dev_set_list) - if (cur->dev == &pdev->dev) - return 0; - return -EBUSY; + return vfio_find_device_in_devset(dev_set, &pdev->dev) ? 0 : -ENODEV; } /* diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index f0ca33b2e1df..ab4f3a794f78 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -141,6 +141,21 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev) +{ + struct vfio_device *cur; + + lockdep_assert_held(&dev_set->lock); + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) + if (cur->dev == dev) + return cur; + return NULL; +} +EXPORT_SYMBOL_GPL(vfio_find_device_in_devset); + /* * Device objects - create, release, get, put, search */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2a45853773a6..ee120d2d530b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -244,6 +244,9 @@ void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, -- cgit v1.2.3 From 9062ff405b49769c04f00373de2c9cefab91b600 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:40 -0700 Subject: vfio/pci: Extend VFIO_DEVICE_GET_PCI_HOT_RESET_INFO for vfio device cdev This allows VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl use the iommufd_ctx of the cdev device to check the ownership of the other affected devices. When VFIO_DEVICE_GET_PCI_HOT_RESET_INFO is called on an IOMMUFD managed device, the new flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is reported to indicate the values returned are IOMMUFD devids rather than group IDs as used when accessing vfio devices through the conventional vfio group interface. Additionally the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED will be reported in this mode if all of the devices affected by the hot-reset are owned by either virtue of being directly bound to the same iommufd context as the calling device, or implicitly owned via a shared IOMMU group. Suggested-by: Jason Gunthorpe Suggested-by: Alex Williamson Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-9-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/iommufd.c | 44 ++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 54 ++++++++++++++++++++++++++++++++++------ include/linux/vfio.h | 14 +++++++++++ include/uapi/linux/vfio.h | 50 ++++++++++++++++++++++++++++++++++++- 4 files changed, 154 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 88b00c501015..afda47ee9663 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -66,6 +66,50 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) vdev->ops->unbind_iommufd(vdev); } +struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev) +{ + if (vdev->iommufd_device) + return iommufd_device_to_ictx(vdev->iommufd_device); + return NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_device_ictx); + +static int vfio_iommufd_device_id(struct vfio_device *vdev) +{ + if (vdev->iommufd_device) + return iommufd_device_to_id(vdev->iommufd_device); + return -EINVAL; +} + +/* + * Return devid for a device. + * valid ID for the device that is owned by the ictx + * -ENOENT = device is owned but there is no ID + * -ENODEV or other error = device is not owned + */ +int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + struct iommu_group *group; + int devid; + + if (vfio_iommufd_device_ictx(vdev) == ictx) + return vfio_iommufd_device_id(vdev); + + group = iommu_group_get(vdev->dev); + if (!group) + return -ENODEV; + + if (iommufd_ctx_has_group(ictx, group)) + devid = -ENOENT; + else + devid = -ENODEV; + + iommu_group_put(group); + + return devid; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_get_dev_id); + /* * The physical standard ops mean that the iommufd_device is bound to the * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 5b5316a5484a..32506c5539b9 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -27,6 +27,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_EEH) #include #endif @@ -779,26 +780,56 @@ struct vfio_pci_fill_info { int max; int cur; struct vfio_pci_dependent_device *devices; + struct vfio_device *vdev; + u32 flags; }; static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) { struct vfio_pci_fill_info *fill = data; - struct iommu_group *iommu_group; if (fill->cur == fill->max) return -EAGAIN; /* Something changed, try again */ - iommu_group = iommu_group_get(&pdev->dev); - if (!iommu_group) - return -EPERM; /* Cannot reset non-isolated devices */ + if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) { + struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev); + struct vfio_device_set *dev_set = fill->vdev->dev_set; + struct vfio_device *vdev; - fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); + /* + * hot-reset requires all affected devices be represented in + * the dev_set. + */ + vdev = vfio_find_device_in_devset(dev_set, &pdev->dev); + if (!vdev) { + fill->devices[fill->cur].devid = VFIO_PCI_DEVID_NOT_OWNED; + } else { + int id = vfio_iommufd_get_dev_id(vdev, iommufd); + + if (id > 0) + fill->devices[fill->cur].devid = id; + else if (id == -ENOENT) + fill->devices[fill->cur].devid = VFIO_PCI_DEVID_OWNED; + else + fill->devices[fill->cur].devid = VFIO_PCI_DEVID_NOT_OWNED; + } + /* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */ + if (fill->devices[fill->cur].devid == VFIO_PCI_DEVID_NOT_OWNED) + fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED; + } else { + struct iommu_group *iommu_group; + + iommu_group = iommu_group_get(&pdev->dev); + if (!iommu_group) + return -EPERM; /* Cannot reset non-isolated devices */ + + fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); + iommu_group_put(iommu_group); + } fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); fill->devices[fill->cur].bus = pdev->bus->number; fill->devices[fill->cur].devfn = pdev->devfn; fill->cur++; - iommu_group_put(iommu_group); return 0; } @@ -1270,17 +1301,26 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( return -ENOMEM; fill.devices = devices; + fill.vdev = &vdev->vdev; + if (vfio_device_cdev_opened(&vdev->vdev)) + fill.flags |= VFIO_PCI_HOT_RESET_FLAG_DEV_ID | + VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED; + + mutex_lock(&vdev->vdev.dev_set->lock); ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs, &fill, slot); + mutex_unlock(&vdev->vdev.dev_set->lock); /* * If a device was removed between counting and filling, we may come up * short of fill.max. If a device was added, we'll have a return of * -EAGAIN above. */ - if (!ret) + if (!ret) { hdr.count = fill.cur; + hdr.flags = fill.flags; + } reset_info_exit: if (copy_to_user(arg, &hdr, minsz)) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ee120d2d530b..7079911edfb1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -114,6 +114,8 @@ struct vfio_device_ops { }; #if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev); +int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx); int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); @@ -123,6 +125,18 @@ int vfio_iommufd_emulated_bind(struct vfio_device *vdev, void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); #else +static inline struct iommufd_ctx * +vfio_iommufd_device_ictx(struct vfio_device *vdev) +{ + return NULL; +} + +static inline int +vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + return VFIO_PCI_DEVID_NOT_OWNED; +} + #define vfio_iommufd_physical_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 20c804bdc09c..e680720ddddc 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -677,11 +677,57 @@ enum { * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) * + * This command is used to query the affected devices in the hot reset for + * a given device. + * + * This command always reports the segment, bus, and devfn information for + * each affected device, and selectively reports the group_id or devid per + * the way how the calling device is opened. + * + * - If the calling device is opened via the traditional group/container + * API, group_id is reported. User should check if it has owned all + * the affected devices and provides a set of group fds to prove the + * ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl. + * + * - If the calling device is opened as a cdev, devid is reported. + * Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this + * data type. All the affected devices should be represented in + * the dev_set, ex. bound to a vfio driver, and also be owned by + * this interface which is determined by the following conditions: + * 1) Has a valid devid within the iommufd_ctx of the calling device. + * Ownership cannot be determined across separate iommufd_ctx and + * the cdev calling conventions do not support a proof-of-ownership + * model as provided in the legacy group interface. In this case + * valid devid with value greater than zero is provided in the return + * structure. + * 2) Does not have a valid devid within the iommufd_ctx of the calling + * device, but belongs to the same IOMMU group as the calling device + * or another opened device that has a valid devid within the + * iommufd_ctx of the calling device. This provides implicit ownership + * for devices within the same DMA isolation context. In this case + * the devid value of VFIO_PCI_DEVID_OWNED is provided in the return + * structure. + * + * A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return + * structure for affected devices where device is NOT represented in the + * dev_set or ownership is not available. Such devices prevent the use + * of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership + * calling conventions (ie. via legacy group accessed devices). Flag + * VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the + * affected devices are represented in the dev_set and also owned by + * the user. This flag is available only when + * flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved. + * * Return: 0 on success, -errno on failure: * -enospc = insufficient buffer, -enodev = unsupported for device. */ struct vfio_pci_dependent_device { - __u32 group_id; + union { + __u32 group_id; + __u32 devid; +#define VFIO_PCI_DEVID_OWNED 0 +#define VFIO_PCI_DEVID_NOT_OWNED -1 + }; __u16 segment; __u8 bus; __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ @@ -690,6 +736,8 @@ struct vfio_pci_dependent_device { struct vfio_pci_hot_reset_info { __u32 argsz; __u32 flags; +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID (1 << 0) +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED (1 << 1) __u32 count; struct vfio_pci_dependent_device devices[]; }; -- cgit v1.2.3 From b1a59be8a2b64d00409dc7c9d523572ed32bcff8 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:27 -0700 Subject: vfio: Refine vfio file kAPIs for KVM This prepares for making the below kAPIs to accept both group file and device file instead of only vfio group file. bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-3-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/group.c | 53 ++++++++++++++++-------------------------------- drivers/vfio/vfio.h | 3 +++ drivers/vfio/vfio_main.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 1 + virt/kvm/vfio.c | 10 ++++----- 5 files changed, 75 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index fbba9fc15e57..b56e19d2a02d 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -754,6 +754,15 @@ bool vfio_device_has_container(struct vfio_device *device) return device->group->container; } +struct vfio_group *vfio_group_from_file(struct file *file) +{ + struct vfio_group *group = file->private_data; + + if (file->f_op != &vfio_group_fops) + return NULL; + return group; +} + /** * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file * @file: VFIO group file @@ -764,13 +773,13 @@ bool vfio_device_has_container(struct vfio_device *device) */ struct iommu_group *vfio_file_iommu_group(struct file *file) { - struct vfio_group *group = file->private_data; + struct vfio_group *group = vfio_group_from_file(file); struct iommu_group *iommu_group = NULL; if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) return NULL; - if (!vfio_file_is_group(file)) + if (!group) return NULL; mutex_lock(&group->group_lock); @@ -784,33 +793,20 @@ struct iommu_group *vfio_file_iommu_group(struct file *file) EXPORT_SYMBOL_GPL(vfio_file_iommu_group); /** - * vfio_file_is_group - True if the file is usable with VFIO aPIS + * vfio_file_is_group - True if the file is a vfio group file * @file: VFIO group file */ bool vfio_file_is_group(struct file *file) { - return file->f_op == &vfio_group_fops; + return vfio_group_from_file(file); } EXPORT_SYMBOL_GPL(vfio_file_is_group); -/** - * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file - * is always CPU cache coherent - * @file: VFIO group file - * - * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop - * bit in DMA transactions. A return of false indicates that the user has - * rights to access additional instructions such as wbinvd on x86. - */ -bool vfio_file_enforced_coherent(struct file *file) +bool vfio_group_enforced_coherent(struct vfio_group *group) { - struct vfio_group *group = file->private_data; struct vfio_device *device; bool ret = true; - if (!vfio_file_is_group(file)) - return true; - /* * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then * any domain later attached to it will also not support it. If the cap @@ -828,28 +824,13 @@ bool vfio_file_enforced_coherent(struct file *file) mutex_unlock(&group->device_lock); return ret; } -EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); -/** - * vfio_file_set_kvm - Link a kvm with VFIO drivers - * @file: VFIO group file - * @kvm: KVM to link - * - * When a VFIO device is first opened the KVM will be available in - * device->kvm if one was associated with the group. - */ -void vfio_file_set_kvm(struct file *file, struct kvm *kvm) +void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) { - struct vfio_group *group = file->private_data; - - if (!vfio_file_is_group(file)) - return; - spin_lock(&group->kvm_ref_lock); group->kvm = kvm; spin_unlock(&group->kvm_ref_lock); } -EXPORT_SYMBOL_GPL(vfio_file_set_kvm); /** * vfio_file_has_dev - True if the VFIO file is a handle for device @@ -860,9 +841,9 @@ EXPORT_SYMBOL_GPL(vfio_file_set_kvm); */ bool vfio_file_has_dev(struct file *file, struct vfio_device *device) { - struct vfio_group *group = file->private_data; + struct vfio_group *group = vfio_group_from_file(file); - if (!vfio_file_is_group(file)) + if (!group) return false; return group == device->group; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 87d3dd6b9ef9..b1e327a85a32 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -90,6 +90,9 @@ void vfio_device_group_unregister(struct vfio_device *device); int vfio_device_group_use_iommu(struct vfio_device *device); void vfio_device_group_unuse_iommu(struct vfio_device *device); void vfio_device_group_close(struct vfio_device *device); +struct vfio_group *vfio_group_from_file(struct file *file); +bool vfio_group_enforced_coherent(struct vfio_group *group); +void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm); bool vfio_device_has_container(struct vfio_device *device); int __init vfio_group_init(void); void vfio_group_cleanup(void); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 39c1158ffef0..4665791aa2eb 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1190,6 +1190,55 @@ const struct file_operations vfio_device_fops = { .mmap = vfio_device_fops_mmap, }; +/** + * vfio_file_is_valid - True if the file is valid vfio file + * @file: VFIO group file or VFIO device file + */ +bool vfio_file_is_valid(struct file *file) +{ + return vfio_group_from_file(file); +} +EXPORT_SYMBOL_GPL(vfio_file_is_valid); + +/** + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file + * is always CPU cache coherent + * @file: VFIO group file or VFIO device file + * + * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop + * bit in DMA transactions. A return of false indicates that the user has + * rights to access additional instructions such as wbinvd on x86. + */ +bool vfio_file_enforced_coherent(struct file *file) +{ + struct vfio_group *group; + + group = vfio_group_from_file(file); + if (group) + return vfio_group_enforced_coherent(group); + + return true; +} +EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); + +/** + * vfio_file_set_kvm - Link a kvm with VFIO drivers + * @file: VFIO group file or VFIO device file + * @kvm: KVM to link + * + * When a VFIO device is first opened the KVM will be available in + * device->kvm if one was associated with the file. + */ +void vfio_file_set_kvm(struct file *file, struct kvm *kvm) +{ + struct vfio_group *group; + + group = vfio_group_from_file(file); + if (group) + vfio_group_set_kvm(group, kvm); +} +EXPORT_SYMBOL_GPL(vfio_file_set_kvm); + /* * Sub-module support */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 7079911edfb1..06a5221949c5 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -272,6 +272,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, */ struct iommu_group *vfio_file_iommu_group(struct file *file); bool vfio_file_is_group(struct file *file); +bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 9584eb57e0ed..b33c7b8488b3 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -64,18 +64,18 @@ static bool kvm_vfio_file_enforced_coherent(struct file *file) return ret; } -static bool kvm_vfio_file_is_group(struct file *file) +static bool kvm_vfio_file_is_valid(struct file *file) { bool (*fn)(struct file *file); bool ret; - fn = symbol_get(vfio_file_is_group); + fn = symbol_get(vfio_file_is_valid); if (!fn) return false; ret = fn(file); - symbol_put(vfio_file_is_group); + symbol_put(vfio_file_is_valid); return ret; } @@ -154,8 +154,8 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd) if (!filp) return -EBADF; - /* Ensure the FD is a vfio group FD.*/ - if (!kvm_vfio_file_is_group(filp)) { + /* Ensure the FD is a vfio FD. */ + if (!kvm_vfio_file_is_valid(filp)) { ret = -EINVAL; goto err_fput; } -- cgit v1.2.3 From 9048c7341c4df9cae04c154a8b0f556dbe913358 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:38 -0700 Subject: vfio-iommufd: Add detach_ioas support for physical VFIO devices This prepares for adding DETACH ioctl for physical VFIO devices. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-14-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio.rst | 8 +++++--- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 + drivers/vfio/iommufd.c | 20 ++++++++++++++++++++ drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 2 ++ drivers/vfio/pci/mlx5/main.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/platform/vfio_amba.c | 1 + drivers/vfio/platform/vfio_platform.c | 1 + drivers/vfio/vfio_main.c | 3 ++- include/linux/vfio.h | 8 +++++++- 10 files changed, 41 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst index 68abc089d6dd..363e12c90b87 100644 --- a/Documentation/driver-api/vfio.rst +++ b/Documentation/driver-api/vfio.rst @@ -279,6 +279,7 @@ similar to a file operations structure:: struct iommufd_ctx *ictx, u32 *out_device_id); void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -315,9 +316,10 @@ container_of(). - The [un]bind_iommufd callbacks are issued when the device is bound to and unbound from iommufd. - - The attach_ioas callback is issued when the device is attached to an - IOAS managed by the bound iommufd. The attached IOAS is automatically - detached when the device is unbound from iommufd. + - The [de]attach_ioas callback is issued when the device is attached to + and detached from an IOAS managed by the bound iommufd. However, the + attached IOAS can also be automatically detached when the device is + unbound from iommufd. - The read/write/mmap callbacks implement the device region access defined by the device's own VFIO_DEVICE_GET_REGION_INFO ioctl. diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index f2140e94d41e..116358a8f1cf 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -593,6 +593,7 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static struct fsl_mc_driver vfio_fsl_mc_driver = { diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 4fc674c01a05..86df5415759a 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -140,6 +140,14 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) { int rc; + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return -EINVAL; + + if (vdev->iommufd_attached) + return -EBUSY; + rc = iommufd_device_attach(vdev->iommufd_device, pt_id); if (rc) return rc; @@ -148,6 +156,18 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); +void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device) || !vdev->iommufd_attached) + return; + + iommufd_device_detach(vdev->iommufd_device); + vdev->iommufd_attached = false; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); + /* * The emulated standard ops mean that vfio_device is going to use the * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index a117eaf21c14..b2f9778c8366 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1373,6 +1373,7 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1391,6 +1392,7 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index d95fd382814c..42ec574a8622 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -1320,6 +1320,7 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 29091ee2e984..cb5b7f865d58 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -141,6 +141,7 @@ static const struct vfio_device_ops vfio_pci_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index 83fe54015595..6464b3939ebc 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -119,6 +119,7 @@ static const struct vfio_device_ops vfio_amba_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static const struct amba_id pl330_ids[] = { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 22a1efca32a8..8cf22fa65baa 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -108,6 +108,7 @@ static const struct vfio_device_ops vfio_platform_ops = { .bind_iommufd = vfio_iommufd_physical_bind, .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, }; static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 3a4b7eb128df..c71c0d1a079f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -273,7 +273,8 @@ static int __vfio_register_dev(struct vfio_device *device, if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && (!device->ops->bind_iommufd || !device->ops->unbind_iommufd || - !device->ops->attach_ioas))) + !device->ops->attach_ioas || + !device->ops->detach_ioas))) return -EINVAL; /* diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 06a5221949c5..f2f02273ece1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -73,7 +73,9 @@ struct vfio_device { * @bind_iommufd: Called when binding the device to an iommufd * @unbind_iommufd: Opposite of bind_iommufd * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the - * bound iommufd. Undo in unbind_iommufd. + * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not + * called. + * @detach_ioas: Opposite of attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -97,6 +99,7 @@ struct vfio_device_ops { struct iommufd_ctx *ictx, u32 *out_device_id); void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -120,6 +123,7 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -144,6 +148,8 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_physical_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) -- cgit v1.2.3 From e23a6217f3bb4f6f205d4517782ad49e3533fc1c Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 18 Jul 2023 06:55:39 -0700 Subject: iommufd/device: Add iommufd_access_detach() API Previously, the detach routine is only done by the destroy(). And it was called by vfio_iommufd_emulated_unbind() when the device runs close(), so all the mappings in iopt were cleaned in that setup, when the call trace reaches this detach() routine. Now, there's a need of a detach uAPI, meaning that it does not only need a new iommufd_access_detach() API, but also requires access->ops->unmap() call as a cleanup. So add one. However, leaving that unprotected can introduce some potential of a race condition during the pin_/unpin_pages() call, where access->ioas->iopt is getting referenced. So, add an ioas_lock to protect the context of iopt referencings. Also, to allow the iommufd_access_unpin_pages() callback to happen via this unmap() call, add an ioas_unpin pointer, so the unpin routine won't be affected by the "access->ioas = NULL" trick. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-15-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/iommu/iommufd/device.c | 74 ++++++++++++++++++++++++++++++--- drivers/iommu/iommufd/iommufd_private.h | 2 + include/linux/iommufd.h | 1 + 3 files changed, 72 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index cd5d8ab907f9..59fec5783eb9 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -486,6 +486,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; + mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); @@ -505,26 +506,60 @@ void iommufd_access_destroy(struct iommufd_access *access) } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +void iommufd_access_detach(struct iommufd_access *access) +{ + struct iommufd_ioas *cur_ioas = access->ioas; + + mutex_lock(&access->ioas_lock); + if (WARN_ON(!access->ioas)) + goto out; + /* + * Set ioas to NULL to block any further iommufd_access_pin_pages(). + * iommufd_access_unpin_pages() can continue using access->ioas_unpin. + */ + access->ioas = NULL; + + if (access->ops->unmap) { + mutex_unlock(&access->ioas_lock); + access->ops->unmap(access->data, 0, ULONG_MAX); + mutex_lock(&access->ioas_lock); + } + iopt_remove_access(&cur_ioas->iopt, access); + refcount_dec(&cur_ioas->obj.users); +out: + access->ioas_unpin = NULL; + mutex_unlock(&access->ioas_lock); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); + int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { struct iommufd_ioas *new_ioas; int rc = 0; - if (access->ioas) + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas || access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); return -EINVAL; + } new_ioas = iommufd_get_ioas(access->ictx, ioas_id); - if (IS_ERR(new_ioas)) + if (IS_ERR(new_ioas)) { + mutex_unlock(&access->ioas_lock); return PTR_ERR(new_ioas); + } rc = iopt_add_access(&new_ioas->iopt, access); if (rc) { + mutex_unlock(&access->ioas_lock); iommufd_put_object(&new_ioas->obj); return rc; } iommufd_ref_to_users(&new_ioas->obj); access->ioas = new_ioas; + access->ioas_unpin = new_ioas; + mutex_unlock(&access->ioas_lock); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); @@ -579,8 +614,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; @@ -588,6 +623,17 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; + mutex_lock(&access->ioas_lock); + /* + * The driver must be doing something wrong if it calls this before an + * iommufd_access_attach() or after an iommufd_access_detach(). + */ + if (WARN_ON(!access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); + return; + } + iopt = &access->ioas_unpin->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( @@ -597,6 +643,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, min(last_iova, iopt_area_last_iova(area)))); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); @@ -642,8 +689,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; @@ -658,6 +705,13 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -688,6 +742,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return 0; err_remove: @@ -702,6 +757,7 @@ err_remove: iopt_area_last_iova(area)))); } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); @@ -721,8 +777,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc; @@ -732,6 +788,13 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -758,6 +821,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index b38e67d1988b..3dcaf86aab97 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -285,6 +285,8 @@ struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_ioas *ioas; + struct iommufd_ioas *ioas_unpin; + struct mutex ioas_lock; const struct iommufd_access_ops *ops; void *data; unsigned long iova_alignment; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 68defed9ea48..3a3216cb9482 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -48,6 +48,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id); void iommufd_access_destroy(struct iommufd_access *access); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); +void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); -- cgit v1.2.3 From 8cfa71860233652f8566bcdf55e77aefe0017b4a Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:40 -0700 Subject: vfio-iommufd: Add detach_ioas support for emulated VFIO devices This prepares for adding DETACH ioctl for emulated VFIO devices. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-16-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/kvmgt.c | 1 + drivers/s390/cio/vfio_ccw_ops.c | 1 + drivers/s390/crypto/vfio_ap_ops.c | 1 + drivers/vfio/iommufd.c | 13 +++++++++++++ include/linux/vfio.h | 3 +++ samples/vfio-mdev/mbochs.c | 1 + samples/vfio-mdev/mdpy.c | 1 + samples/vfio-mdev/mtty.c | 1 + 8 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index de675d799c7d..9cd9e9da60dd 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1474,6 +1474,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 5b53b94f13c7..cba4971618ff 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -632,6 +632,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index b441745b0418..2d3c3a79b687 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1975,6 +1975,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, .request = vfio_ap_mdev_request }; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 86df5415759a..4d84904fd927 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -231,3 +231,16 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id) return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas); + +void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_access) || + !vdev->iommufd_attached) + return; + + iommufd_access_detach(vdev->iommufd_access); + vdev->iommufd_attached = false; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_detach_ioas); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index f2f02273ece1..24091a7c7bdb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -128,6 +128,7 @@ int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev); #else static inline struct iommufd_ctx * vfio_iommufd_device_ictx(struct vfio_device *vdev) @@ -157,6 +158,8 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #endif static inline bool vfio_device_cdev_opened(struct vfio_device *device) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index c6c6b5d26670..3764d1911b51 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1377,6 +1377,7 @@ static const struct vfio_device_ops mbochs_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static struct mdev_driver mbochs_driver = { diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index a62ea11e20ec..064e1c0a7aa8 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -666,6 +666,7 @@ static const struct vfio_device_ops mdpy_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static struct mdev_driver mdpy_driver = { diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index a60801fb8660..5af00387c519 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1272,6 +1272,7 @@ static const struct vfio_device_ops mtty_dev_ops = { .bind_iommufd = vfio_iommufd_emulated_bind, .unbind_iommufd = vfio_iommufd_emulated_unbind, .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static struct mdev_driver mtty_driver = { -- cgit v1.2.3 From 8b6f173a4ce47ef0606124710315560c64f2344e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:43 -0700 Subject: vfio: Add cdev for vfio_device This adds cdev support for vfio_device. It allows the user to directly open a vfio device w/o using the legacy container/group interface, as a prerequisite for supporting new iommu features like nested translation and etc. The device fd opened in this manner doesn't have the capability to access the device as the fops open() doesn't open the device until the successful VFIO_DEVICE_BIND_IOMMUFD ioctl which will be added in a later patch. With this patch, devices registered to vfio core would have both the legacy group and the new device interfaces created. - group interface : /dev/vfio/$groupID - device interface: /dev/vfio/devices/vfioX - normal device ("X" is a unique number across vfio devices) For a given device, the user can identify the matching vfioX by searching the vfio-dev folder under the sysfs path of the device. Take PCI device (0000:6a:01.0) as an example, /sys/bus/pci/devices/0000\:6a\:01.0/vfio-dev/vfioX implies the matching vfioX under /dev/vfio/devices/, and vfio-dev/vfioX/dev contains the major:minor number of the matching /dev/vfio/devices/vfioX. The user can get device fd by opening the /dev/vfio/devices/vfioX. The vfio_device cdev logic in this patch: *) __vfio_register_dev() path ends up doing cdev_device_add() for each vfio_device if VFIO_DEVICE_CDEV configured. *) vfio_unregister_group_dev() path does cdev_device_del(); cdev interface does not support noiommu devices, so VFIO only creates the legacy group interface for the physical devices that do not have IOMMU. noiommu users should use the legacy group interface. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-19-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 12 +++++++++ drivers/vfio/Makefile | 1 + drivers/vfio/device_cdev.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 54 +++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio_main.c | 21 +++++++++++++--- include/linux/vfio.h | 4 +++ 6 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 drivers/vfio/device_cdev.c (limited to 'include/linux') diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index aba36f5be4ec..26f18d92eb97 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -12,6 +12,18 @@ menuconfig VFIO If you don't know what to do here, say N. if VFIO +config VFIO_DEVICE_CDEV + bool "Support for the VFIO cdev /dev/vfio/devices/vfioX" + depends on IOMMUFD && !SPAPR_TCE_IOMMU + help + The VFIO device cdev is another way for userspace to get device + access. Userspace gets device fd by opening device cdev under + /dev/vfio/devices/vfioX, and then bind the device fd with an iommufd + to set up secure DMA context for device access. This interface does + not support noiommu. + + If you don't know what to do here, say N. + config VFIO_CONTAINER bool "Support for the VFIO container /dev/vfio/vfio" select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 66f418aef5a9..87ccb16fdd77 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ group.o \ iova_bitmap.o +vfio-$(CONFIG_VFIO_DEVICE_CDEV) += device_cdev.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c new file mode 100644 index 000000000000..bf1032d00107 --- /dev/null +++ b/drivers/vfio/device_cdev.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Intel Corporation. + */ +#include + +#include "vfio.h" + +static dev_t device_devt; + +void vfio_init_device_cdev(struct vfio_device *device) +{ + device->device.devt = MKDEV(MAJOR(device_devt), device->index); + cdev_init(&device->cdev, &vfio_device_fops); + device->cdev.owner = THIS_MODULE; +} + +/* + * device access via the fd opened by this function is blocked until + * .open_device() is called successfully during BIND_IOMMUFD. + */ +int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) +{ + struct vfio_device *device = container_of(inode->i_cdev, + struct vfio_device, cdev); + struct vfio_device_file *df; + int ret; + + /* Paired with the put in vfio_device_fops_release() */ + if (!vfio_device_try_get_registration(device)) + return -ENODEV; + + df = vfio_allocate_device_file(device); + if (IS_ERR(df)) { + ret = PTR_ERR(df); + goto err_put_registration; + } + + filep->private_data = df; + + return 0; + +err_put_registration: + vfio_device_put_registration(device); + return ret; +} + +static char *vfio_device_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); +} + +int vfio_cdev_init(struct class *device_class) +{ + device_class->devnode = vfio_device_devnode; + return alloc_chrdev_region(&device_devt, 0, + MINORMASK + 1, "vfio-dev"); +} + +void vfio_cdev_cleanup(void) +{ + unregister_chrdev_region(device_devt, MINORMASK + 1); +} diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 58801adc1a7e..fb8f2fac3d23 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -266,6 +266,60 @@ vfio_iommufd_compat_attach_ioas(struct vfio_device *device, } #endif +#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) +void vfio_init_device_cdev(struct vfio_device *device); + +static inline int vfio_device_add(struct vfio_device *device) +{ + /* cdev does not support noiommu device */ + if (vfio_device_is_noiommu(device)) + return device_add(&device->device); + vfio_init_device_cdev(device); + return cdev_device_add(&device->cdev, &device->device); +} + +static inline void vfio_device_del(struct vfio_device *device) +{ + if (vfio_device_is_noiommu(device)) + device_del(&device->device); + else + cdev_device_del(&device->cdev, &device->device); +} + +int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep); +int vfio_cdev_init(struct class *device_class); +void vfio_cdev_cleanup(void); +#else +static inline void vfio_init_device_cdev(struct vfio_device *device) +{ +} + +static inline int vfio_device_add(struct vfio_device *device) +{ + return device_add(&device->device); +} + +static inline void vfio_device_del(struct vfio_device *device) +{ + device_del(&device->device); +} + +static inline int vfio_device_fops_cdev_open(struct inode *inode, + struct file *filep) +{ + return 0; +} + +static inline int vfio_cdev_init(struct class *device_class) +{ + return 0; +} + +static inline void vfio_cdev_cleanup(void) +{ +} +#endif /* CONFIG_VFIO_DEVICE_CDEV */ + #if IS_ENABLED(CONFIG_VFIO_VIRQFD) int __init vfio_virqfd_init(void); void vfio_virqfd_exit(void); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 294bb5ecfc1c..8a9ebcc6980b 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -292,7 +292,7 @@ static int __vfio_register_dev(struct vfio_device *device, if (ret) return ret; - ret = device_add(&device->device); + ret = vfio_device_add(device); if (ret) goto err_out; @@ -338,8 +338,11 @@ void vfio_unregister_group_dev(struct vfio_device *device) */ vfio_device_group_unregister(device); - /* Balances device_add in register path */ - device_del(&device->device); + /* + * Balances vfio_device_add() in register path, also prevents + * new device opened by userspace in the cdev path. + */ + vfio_device_del(device); vfio_device_put_registration(device); rc = try_wait_for_completion(&device->comp); @@ -567,7 +570,8 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; - vfio_df_group_close(df); + if (df->group) + vfio_df_group_close(df); vfio_device_put_registration(device); @@ -1216,6 +1220,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, + .open = vfio_device_fops_cdev_open, .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, @@ -1567,9 +1572,16 @@ static int __init vfio_init(void) goto err_dev_class; } + ret = vfio_cdev_init(vfio.device_class); + if (ret) + goto err_alloc_dev_chrdev; + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; +err_alloc_dev_chrdev: + class_destroy(vfio.device_class); + vfio.device_class = NULL; err_dev_class: vfio_virqfd_exit(); err_virqfd: @@ -1580,6 +1592,7 @@ err_virqfd: static void __exit vfio_cleanup(void) { ida_destroy(&vfio.device_ida); + vfio_cdev_cleanup(); class_destroy(vfio.device_class); vfio.device_class = NULL; vfio_virqfd_exit(); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 24091a7c7bdb..e0069f26488d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,9 @@ struct vfio_device { /* Members below here are private, not for driver use */ unsigned int index; struct device device; /* device.kref covers object life circle */ +#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) + struct cdev cdev; +#endif refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; -- cgit v1.2.3 From 1c9dc07487cb0f246075b2d3b305bba91156d376 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:45 -0700 Subject: iommufd: Add iommufd_ctx_from_fd() It's common to get a reference to the iommufd context from a given file descriptor. So adds an API for it. Existing users of this API are compiled only when IOMMUFD is enabled, so no need to have a stub for the IOMMUFD disabled case. Tested-by: Yanting Jiang Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-21-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/iommu/iommufd/main.c | 24 ++++++++++++++++++++++++ include/linux/iommufd.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 32ce7befc8dd..4bbb20dff430 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -377,6 +377,30 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); +/** + * iommufd_ctx_from_fd - Acquires a reference to the iommufd context + * @fd: File descriptor to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_fd(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &iommufd_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + /* fget is the same as iommufd_ctx_get() */ + return file->private_data; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD); + /** * iommufd_ctx_put - Put back a reference * @ictx: Context to put back diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 3a3216cb9482..9657c58813dc 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -54,6 +54,7 @@ void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +struct iommufd_ctx *iommufd_ctx_from_fd(int fd); void iommufd_ctx_put(struct iommufd_ctx *ictx); bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); -- cgit v1.2.3 From 5fcc26969a164e6a3154bb68badd6712716eb954 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:47 -0700 Subject: vfio: Add VFIO_DEVICE_BIND_IOMMUFD This adds ioctl for userspace to bind device cdev fd to iommufd. VFIO_DEVICE_BIND_IOMMUFD: bind device to an iommufd, hence gain DMA control provided by the iommufd. open_device op is called after bind_iommufd op. Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230718135551.6592-23-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/device_cdev.c | 107 +++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 13 ++++++ drivers/vfio/vfio_main.c | 5 +++ include/linux/vfio.h | 5 ++- include/uapi/linux/vfio.h | 27 ++++++++++++ 5 files changed, 155 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index bf1032d00107..f40784dd5561 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -3,6 +3,7 @@ * Copyright (c) 2023 Intel Corporation. */ #include +#include #include "vfio.h" @@ -45,6 +46,112 @@ err_put_registration: return ret; } +static void vfio_df_get_kvm_safe(struct vfio_device_file *df) +{ + spin_lock(&df->kvm_ref_lock); + vfio_device_get_kvm_safe(df->device, df->kvm); + spin_unlock(&df->kvm_ref_lock); +} + +long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, + struct vfio_device_bind_iommufd __user *arg) +{ + struct vfio_device *device = df->device; + struct vfio_device_bind_iommufd bind; + unsigned long minsz; + int ret; + + static_assert(__same_type(arg->out_devid, df->devid)); + + minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); + + if (copy_from_user(&bind, arg, minsz)) + return -EFAULT; + + if (bind.argsz < minsz || bind.flags || bind.iommufd < 0) + return -EINVAL; + + /* BIND_IOMMUFD only allowed for cdev fds */ + if (df->group) + return -EINVAL; + + ret = vfio_device_block_group(device); + if (ret) + return ret; + + mutex_lock(&device->dev_set->lock); + /* one device cannot be bound twice */ + if (df->access_granted) { + ret = -EINVAL; + goto out_unlock; + } + + df->iommufd = iommufd_ctx_from_fd(bind.iommufd); + if (IS_ERR(df->iommufd)) { + ret = PTR_ERR(df->iommufd); + df->iommufd = NULL; + goto out_unlock; + } + + /* + * Before the device open, get the KVM pointer currently + * associated with the device file (if there is) and obtain + * a reference. This reference is held until device closed. + * Save the pointer in the device for use by drivers. + */ + vfio_df_get_kvm_safe(df); + + ret = vfio_df_open(df); + if (ret) + goto out_put_kvm; + + ret = copy_to_user(&arg->out_devid, &df->devid, + sizeof(df->devid)) ? -EFAULT : 0; + if (ret) + goto out_close_device; + + device->cdev_opened = true; + /* + * Paired with smp_load_acquire() in vfio_device_fops::ioctl/ + * read/write/mmap + */ + smp_store_release(&df->access_granted, true); + mutex_unlock(&device->dev_set->lock); + return 0; + +out_close_device: + vfio_df_close(df); +out_put_kvm: + vfio_device_put_kvm(device); + iommufd_ctx_put(df->iommufd); + df->iommufd = NULL; +out_unlock: + mutex_unlock(&device->dev_set->lock); + vfio_device_unblock_group(device); + return ret; +} + +void vfio_df_unbind_iommufd(struct vfio_device_file *df) +{ + struct vfio_device *device = df->device; + + /* + * In the time of close, there is no contention with another one + * changing this flag. So read df->access_granted without lock + * and no smp_load_acquire() is ok. + */ + if (!df->access_granted) + return; + + mutex_lock(&device->dev_set->lock); + vfio_df_close(df); + vfio_device_put_kvm(device); + iommufd_ctx_put(df->iommufd); + device->cdev_opened = false; + mutex_unlock(&device->dev_set->lock); + vfio_device_unblock_group(device); +} + static char *vfio_device_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index c2aa65382592..b6d4ba1ef2b8 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -287,6 +287,9 @@ static inline void vfio_device_del(struct vfio_device *device) } int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep); +long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, + struct vfio_device_bind_iommufd __user *arg); +void vfio_df_unbind_iommufd(struct vfio_device_file *df); int vfio_cdev_init(struct class *device_class); void vfio_cdev_cleanup(void); #else @@ -310,6 +313,16 @@ static inline int vfio_device_fops_cdev_open(struct inode *inode, return 0; } +static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df, + struct vfio_device_bind_iommufd __user *arg) +{ + return -ENOTTY; +} + +static inline void vfio_df_unbind_iommufd(struct vfio_device_file *df) +{ +} + static inline int vfio_cdev_init(struct class *device_class) { return 0; diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index a2744cb64c6d..9fdf93ff17cf 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -575,6 +575,8 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) if (df->group) vfio_df_group_close(df); + else + vfio_df_unbind_iommufd(df); vfio_device_put_registration(device); @@ -1149,6 +1151,9 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, void __user *uptr = (void __user *)arg; int ret; + if (cmd == VFIO_DEVICE_BIND_IOMMUFD) + return vfio_df_ioctl_bind_iommufd(df, uptr); + /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e0069f26488d..d6228c839c44 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -64,8 +64,9 @@ struct vfio_device { void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; - bool iommufd_attached; + u8 iommufd_attached:1; #endif + u8 cdev_opened:1; }; /** @@ -168,7 +169,7 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) static inline bool vfio_device_cdev_opened(struct vfio_device *device) { - return false; + return device->cdev_opened; } /** diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 4c3d548e9c96..098946b23e86 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -897,6 +897,33 @@ struct vfio_device_feature { #define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) +/* + * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_device_bind_iommufd) + * @argsz: User filled size of this data. + * @flags: Must be 0. + * @iommufd: iommufd to bind. + * @out_devid: The device id generated by this bind. devid is a handle for + * this device/iommufd bond and can be used in IOMMUFD commands. + * + * Bind a vfio_device to the specified iommufd. + * + * User is restricted from accessing the device before the binding operation + * is completed. Only allowed on cdev fds. + * + * Unbind is automatically conducted when device fd is closed. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_bind_iommufd { + __u32 argsz; + __u32 flags; + __s32 iommufd; + __u32 out_devid; +}; + +#define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) + /* * Provide support for setting a PCI VF Token, which is used as a shared * secret between PF and VF drivers. This feature may only be set on a -- cgit v1.2.3 From c1cce6d079b875396c9a7c6838fc5b024758e540 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:50 -0700 Subject: vfio: Compile vfio_group infrastructure optionally vfio_group is not needed for vfio device cdev, so with vfio device cdev introduced, the vfio_group infrastructures can be compiled out if only cdev is needed. Reviewed-by: Jason Gunthorpe Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-26-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/iommu/iommufd/Kconfig | 4 +- drivers/vfio/Kconfig | 15 ++++++++ drivers/vfio/Makefile | 2 +- drivers/vfio/vfio.h | 89 ++++++++++++++++++++++++++++++++++++++++--- include/linux/vfio.h | 25 ++++++++++-- 5 files changed, 123 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index ada693ea51a7..99d4b075df49 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -14,8 +14,8 @@ config IOMMUFD if IOMMUFD config IOMMUFD_VFIO_CONTAINER bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" - depends on VFIO && !VFIO_CONTAINER - default VFIO && !VFIO_CONTAINER + depends on VFIO_GROUP && !VFIO_CONTAINER + default VFIO_GROUP && !VFIO_CONTAINER help IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on IOMMUFD providing compatibility emulation to give the same ioctls. diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 26f18d92eb97..6bda6dbb4878 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -4,6 +4,8 @@ menuconfig VFIO select IOMMU_API depends on IOMMUFD || !IOMMUFD select INTERVAL_TREE + select VFIO_GROUP if SPAPR_TCE_IOMMU || IOMMUFD=n + select VFIO_DEVICE_CDEV if !VFIO_GROUP select VFIO_CONTAINER if IOMMUFD=n help VFIO provides a framework for secure userspace device drivers. @@ -15,6 +17,7 @@ if VFIO config VFIO_DEVICE_CDEV bool "Support for the VFIO cdev /dev/vfio/devices/vfioX" depends on IOMMUFD && !SPAPR_TCE_IOMMU + default !VFIO_GROUP help The VFIO device cdev is another way for userspace to get device access. Userspace gets device fd by opening device cdev under @@ -24,9 +27,20 @@ config VFIO_DEVICE_CDEV If you don't know what to do here, say N. +config VFIO_GROUP + bool "Support for the VFIO group /dev/vfio/$group_id" + default y + help + VFIO group support provides the traditional model for accessing + devices through VFIO and is used by the majority of userspace + applications and drivers making use of VFIO. + + If you don't know what to do here, say Y. + config VFIO_CONTAINER bool "Support for the VFIO container /dev/vfio/vfio" select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + depends on VFIO_GROUP default y help The VFIO container is the classic interface to VFIO for establishing @@ -48,6 +62,7 @@ endif config VFIO_NOIOMMU bool "VFIO No-IOMMU support" + depends on VFIO_GROUP help VFIO is built on the ability to isolate devices using the IOMMU. Only with an IOMMU can userspace access to DMA capable devices be diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 87ccb16fdd77..c82ea032d352 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -2,9 +2,9 @@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - group.o \ iova_bitmap.o vfio-$(CONFIG_VFIO_DEVICE_CDEV) += device_cdev.o +vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 8353774a5d07..307e3f29b527 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -36,6 +36,12 @@ vfio_allocate_device_file(struct vfio_device *device); extern const struct file_operations vfio_device_fops; +#ifdef CONFIG_VFIO_NOIOMMU +extern bool vfio_noiommu __read_mostly; +#else +enum { vfio_noiommu = false }; +#endif + enum vfio_group_type { /* * Physical device with IOMMU backing. @@ -60,6 +66,7 @@ enum vfio_group_type { VFIO_NO_IOMMU, }; +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct vfio_group { struct device dev; struct cdev cdev; @@ -111,6 +118,82 @@ static inline bool vfio_device_is_noiommu(struct vfio_device *vdev) return IS_ENABLED(CONFIG_VFIO_NOIOMMU) && vdev->group->type == VFIO_NO_IOMMU; } +#else +struct vfio_group; + +static inline int vfio_device_block_group(struct vfio_device *device) +{ + return 0; +} + +static inline void vfio_device_unblock_group(struct vfio_device *device) +{ +} + +static inline int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type) +{ + return 0; +} + +static inline void vfio_device_remove_group(struct vfio_device *device) +{ +} + +static inline void vfio_device_group_register(struct vfio_device *device) +{ +} + +static inline void vfio_device_group_unregister(struct vfio_device *device) +{ +} + +static inline int vfio_device_group_use_iommu(struct vfio_device *device) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_device_group_unuse_iommu(struct vfio_device *device) +{ +} + +static inline void vfio_df_group_close(struct vfio_device_file *df) +{ +} + +static inline struct vfio_group *vfio_group_from_file(struct file *file) +{ + return NULL; +} + +static inline bool vfio_group_enforced_coherent(struct vfio_group *group) +{ + return true; +} + +static inline void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) +{ +} + +static inline bool vfio_device_has_container(struct vfio_device *device) +{ + return false; +} + +static inline int __init vfio_group_init(void) +{ + return 0; +} + +static inline void vfio_group_cleanup(void) +{ +} + +static inline bool vfio_device_is_noiommu(struct vfio_device *vdev) +{ + return false; +} +#endif /* CONFIG_VFIO_GROUP */ #if IS_ENABLED(CONFIG_VFIO_CONTAINER) /** @@ -351,12 +434,6 @@ static inline void vfio_virqfd_exit(void) } #endif -#ifdef CONFIG_VFIO_NOIOMMU -extern bool vfio_noiommu __read_mostly; -#else -enum { vfio_noiommu = false }; -#endif - #ifdef CONFIG_HAVE_KVM void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm); void vfio_device_put_kvm(struct vfio_device *device); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index d6228c839c44..5a1dee983f17 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -43,7 +43,11 @@ struct vfio_device { */ const struct vfio_migration_ops *mig_ops; const struct vfio_log_ops *log_ops; +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct vfio_group *group; + struct list_head group_next; + struct list_head iommu_entry; +#endif struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; @@ -58,8 +62,6 @@ struct vfio_device { refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; - struct list_head group_next; - struct list_head iommu_entry; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) @@ -284,12 +286,29 @@ int vfio_mig_get_next_state(struct vfio_device *device, /* * External user API */ +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); bool vfio_file_is_group(struct file *file); +bool vfio_file_has_dev(struct file *file, struct vfio_device *device); +#else +static inline struct iommu_group *vfio_file_iommu_group(struct file *file) +{ + return NULL; +} + +static inline bool vfio_file_is_group(struct file *file) +{ + return false; +} + +static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + return false; +} +#endif bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); -bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -- cgit v1.2.3