From e36ba5ab808ef6237c3148d469c8238674230e2b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:23 -0700 Subject: iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that provides user space (VMM) another FD to read the vIOMMU Events. Allow a vIOMMU object to allocate vEVENTQs, with a condition that each vIOMMU can only have one single vEVENTQ per type. Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl. Link: https://patch.msgid.link/r/21acf0751dd5c93846935ee06f93b9c65eff5e04.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 82 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 78747b24bd0f..dbb8787d9c63 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -55,6 +55,7 @@ enum { IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, + IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, }; /** @@ -1014,4 +1015,85 @@ struct iommu_ioas_change_process { #define IOMMU_IOAS_CHANGE_PROCESS \ _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) +/** + * enum iommu_veventq_flag - flag for struct iommufd_vevent_header + * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs + */ +enum iommu_veventq_flag { + IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), +}; + +/** + * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status + * @flags: Combination of enum iommu_veventq_flag + * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of + * [0, INT_MAX] where the following index of INT_MAX is 0 + * + * Each iommufd_vevent_header reports a sequence index of the following vEVENT: + * ------------------------------------------------------------------------- + * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | + * ------------------------------------------------------------------------- + * And this sequence index is expected to be monotonic to the sequence index of + * the previous vEVENT. If two adjacent sequence indexes has a delta larger than + * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: + * ------------------------------------------------------------------------- + * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | + * ------------------------------------------------------------------------- + * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT + * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header + * would be added to the tail, and no data would follow this header: + * --------------------------------------------------------------------------- + * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | + * --------------------------------------------------------------------------- + */ +struct iommufd_vevent_header { + __u32 flags; + __u32 sequence; +}; + +/** + * enum iommu_veventq_type - Virtual Event Queue Type + * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_veventq_type { + IOMMU_VEVENTQ_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) + * @size: sizeof(struct iommu_veventq_alloc) + * @flags: Must be 0 + * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with + * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type + * @veventq_depth: Maximum number of events in the vEVENTQ + * @out_veventq_id: The ID of the new vEVENTQ + * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the + * successfully returned fd after using it + * @__reserved: Must be 0 + * + * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU + * can have multiple FDs for different types, but is confined to one per @type. + * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, + * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, + * if the number of the vEVENTs hits @veventq_depth. + * + * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by + * a type-specific data structure, in a normal case: + * ------------------------------------------------------------- + * || header0 | data0 | header1 | data1 | ... | headerN | dataN || + * ------------------------------------------------------------- + * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to + * struct iommufd_vevent_header). + */ +struct iommu_veventq_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 veventq_depth; + __u32 out_veventq_id; + __u32 out_veventq_fd; + __u32 __reserved; +}; +#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) #endif -- cgit v1.2.3 From e7d3fa3d29d5b2ed12d247cf57a0a34fffe89eb8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:31 -0700 Subject: iommu/arm-smmu-v3: Report events that belong to devices attached to vIOMMU Aside from the IOPF framework, iommufd provides an additional pathway to report hardware events, via the vEVENTQ of vIOMMU infrastructure. Define an iommu_vevent_arm_smmuv3 uAPI structure, and report stage-1 events in the threaded IRQ handler. Also, add another four event record types that can be forwarded to a VM. Link: https://patch.msgid.link/r/5cf6719682fdfdabffdb08374cdf31ad2466d75a.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Acked-by: Will Deacon Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 17 +++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 58 ++++++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++ include/uapi/linux/iommufd.h | 23 +++++++++ 4 files changed, 80 insertions(+), 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index dfc80e1a8e2e..65adfed56969 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -433,4 +433,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, return &vsmmu->core; } +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) +{ + struct iommu_vevent_arm_smmuv3 vevt; + int i; + + lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex); + + vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) | + FIELD_PREP(EVTQ_0_SID, vmaster->vsid)); + for (i = 1; i < EVTQ_ENT_DWORDS; i++) + vevt.evt[i] = cpu_to_le64(evt[i]); + + return iommufd_viommu_report_event(&vmaster->vsmmu->core, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt, + sizeof(vevt)); +} + MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 964d2cf27d3d..5fa817a8f5f1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1813,8 +1813,8 @@ static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, mutex_unlock(&smmu->streams_mutex); } -static int arm_smmu_handle_event(struct arm_smmu_device *smmu, - struct arm_smmu_event *event) +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; @@ -1823,6 +1823,10 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, struct iommu_fault *flt = &fault_evt.fault; switch (event->id) { + case EVT_ID_BAD_STE_CONFIG: + case EVT_ID_STREAM_DISABLED_FAULT: + case EVT_ID_BAD_SUBSTREAMID_CONFIG: + case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1832,31 +1836,30 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, return -EOPNOTSUPP; } - if (!event->stall) - return -EOPNOTSUPP; - - if (event->read) - perm |= IOMMU_FAULT_PERM_READ; - else - perm |= IOMMU_FAULT_PERM_WRITE; + if (event->stall) { + if (event->read) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; - if (event->instruction) - perm |= IOMMU_FAULT_PERM_EXEC; + if (event->instruction) + perm |= IOMMU_FAULT_PERM_EXEC; - if (event->privileged) - perm |= IOMMU_FAULT_PERM_PRIV; + if (event->privileged) + perm |= IOMMU_FAULT_PERM_PRIV; - flt->type = IOMMU_FAULT_PAGE_REQ; - flt->prm = (struct iommu_fault_page_request) { - .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = event->stag, - .perm = perm, - .addr = event->iova, - }; + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request){ + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = event->stag, + .perm = perm, + .addr = event->iova, + }; - if (event->ssv) { - flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = event->ssid; + if (event->ssv) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = event->ssid; + } } mutex_lock(&smmu->streams_mutex); @@ -1866,7 +1869,12 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, goto out_unlock; } - ret = iommu_report_device_fault(master->dev, &fault_evt); + if (event->stall) + ret = iommu_report_device_fault(master->dev, &fault_evt); + else if (master->vmaster && !event->s2) + ret = arm_vmaster_report_event(master->vmaster, evt); + else + ret = -EOPNOTSUPP; /* Unhandled events should be pinned */ out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; @@ -1944,7 +1952,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) do { while (!queue_remove_raw(q, evt)) { arm_smmu_decode_event(smmu, evt, &event); - if (arm_smmu_handle_event(smmu, &event)) + if (arm_smmu_handle_event(smmu, evt, &event)) arm_smmu_dump_event(smmu, evt, &event, &rs); put_device(event.dev); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 557b300a3a0f..df06076a1698 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -1066,6 +1066,7 @@ int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, struct arm_smmu_nested_domain *nested_domain); void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt); #else #define arm_smmu_hw_info NULL #define arm_vsmmu_alloc NULL @@ -1086,6 +1087,12 @@ static inline void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) { } + +static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, + u64 *evt) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index dbb8787d9c63..8719d4f5d618 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1054,9 +1054,32 @@ struct iommufd_vevent_header { /** * enum iommu_veventq_type - Virtual Event Queue Type * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue */ enum iommu_veventq_type { IOMMU_VEVENTQ_TYPE_DEFAULT = 0, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, +}; + +/** + * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event + * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) + * @evt: 256-bit ARM SMMUv3 Event record, little-endian. + * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) + * - 0x04 C_BAD_STE + * - 0x06 F_STREAM_DISABLED + * - 0x08 C_BAD_SUBSTREAMID + * - 0x0a C_BAD_CD + * - 0x10 F_TRANSLATION + * - 0x11 F_ADDR_SIZE + * - 0x12 F_ACCESS + * - 0x13 F_PERMISSION + * + * StreamID field reports a virtual device ID. To receive a virtual event for a + * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. + */ +struct iommu_vevent_arm_smmuv3 { + __aligned_le64 evt[4]; }; /** -- cgit v1.2.3 From dbc5f37b4f8ad833132f77c1f67e68bb11ca9b9e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:36 -0700 Subject: iommufd: Allow allocating PASID-compatible domain The underlying infrastructure has supported the PASID attach and related enforcement per the requirement of the IOMMU_HWPT_ALLOC_PASID flag. This extends iommufd to support PASID compatible domain requested by userspace. Link: https://patch.msgid.link/r/20250321171940.7213-15-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 7 ++++--- include/uapi/linux/iommufd.h | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 3724533a23c9..487779470261 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -112,7 +112,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, { const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_FAULT_ID_VALID; + IOMMU_HWPT_FAULT_ID_VALID | + IOMMU_HWPT_ALLOC_PASID; const struct iommu_ops *ops = dev_iommu_ops(idev->dev); struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; @@ -233,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) || !user_data->len || !ops->domain_alloc_nested) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent || @@ -290,7 +291,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, struct iommufd_hw_pagetable *hwpt; int rc; - if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) return ERR_PTR(-EOPNOTSUPP); if (!user_data->len) return ERR_PTR(-EOPNOTSUPP); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 8719d4f5d618..6901804ec736 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -393,6 +393,9 @@ struct iommu_vfio_ioas { * Any domain attached to the non-PASID part of the * device must also be flagged, otherwise attaching a * PASID will blocked. + * For the user that wants to attach PASID, ioas is + * not recommended for both the non-PASID part + * and PASID part of the device. * If IOMMU does not support PASID it will return * error (-EOPNOTSUPP). */ -- cgit v1.2.3 From ad744ed5dd8b70e9256fc1ff18aaaffeedf5f21e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:41 -0700 Subject: vfio: VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT support pasid This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach a given pasid of a vfio device to/from an IOAS/HWPT. Link: https://patch.msgid.link/r/20250321180143.8468-4-yi.l.liu@intel.com Reviewed-by: Alex Williamson Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/device_cdev.c | 60 +++++++++++++++++++++++++++++++++++++++------- include/uapi/linux/vfio.h | 29 ++++++++++++++-------- 2 files changed, 71 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index bb1817bd4ff3..281a8dc3ed49 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -162,9 +162,9 @@ void vfio_df_unbind_iommufd(struct vfio_device_file *df) int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, struct vfio_device_attach_iommufd_pt __user *arg) { - struct vfio_device *device = df->device; struct vfio_device_attach_iommufd_pt attach; - unsigned long minsz; + struct vfio_device *device = df->device; + unsigned long minsz, xend = 0; int ret; minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); @@ -172,11 +172,34 @@ int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, if (copy_from_user(&attach, arg, minsz)) return -EFAULT; - if (attach.argsz < minsz || attach.flags) + if (attach.argsz < minsz) return -EINVAL; + if (attach.flags & ~VFIO_DEVICE_ATTACH_PASID) + return -EINVAL; + + if (attach.flags & VFIO_DEVICE_ATTACH_PASID) { + if (!device->ops->pasid_attach_ioas) + return -EOPNOTSUPP; + xend = offsetofend(struct vfio_device_attach_iommufd_pt, pasid); + } + + if (xend) { + if (attach.argsz < xend) + return -EINVAL; + + if (copy_from_user((void *)&attach + minsz, + (void __user *)arg + minsz, xend - minsz)) + return -EFAULT; + } + mutex_lock(&device->dev_set->lock); - ret = device->ops->attach_ioas(device, &attach.pt_id); + if (attach.flags & VFIO_DEVICE_ATTACH_PASID) + ret = device->ops->pasid_attach_ioas(device, + attach.pasid, + &attach.pt_id); + else + ret = device->ops->attach_ioas(device, &attach.pt_id); if (ret) goto out_unlock; @@ -198,20 +221,41 @@ out_unlock: int vfio_df_ioctl_detach_pt(struct vfio_device_file *df, struct vfio_device_detach_iommufd_pt __user *arg) { - struct vfio_device *device = df->device; struct vfio_device_detach_iommufd_pt detach; - unsigned long minsz; + struct vfio_device *device = df->device; + unsigned long minsz, xend = 0; minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); if (copy_from_user(&detach, arg, minsz)) return -EFAULT; - if (detach.argsz < minsz || detach.flags) + if (detach.argsz < minsz) return -EINVAL; + if (detach.flags & ~VFIO_DEVICE_DETACH_PASID) + return -EINVAL; + + if (detach.flags & VFIO_DEVICE_DETACH_PASID) { + if (!device->ops->pasid_detach_ioas) + return -EOPNOTSUPP; + xend = offsetofend(struct vfio_device_detach_iommufd_pt, pasid); + } + + if (xend) { + if (detach.argsz < xend) + return -EINVAL; + + if (copy_from_user((void *)&detach + minsz, + (void __user *)arg + minsz, xend - minsz)) + return -EFAULT; + } + mutex_lock(&device->dev_set->lock); - device->ops->detach_ioas(device); + if (detach.flags & VFIO_DEVICE_DETACH_PASID) + device->ops->pasid_detach_ioas(device, detach.pasid); + else + device->ops->detach_ioas(device); mutex_unlock(&device->dev_set->lock); return 0; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index c8dbf8219c4f..6899da70b929 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -931,29 +931,34 @@ struct vfio_device_bind_iommufd { * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, * struct vfio_device_attach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for attach. * @pt_id: Input the target id which can represent an ioas or a hwpt * allocated via iommufd subsystem. * Output the input ioas id or the attached hwpt id which could * be the specified hwpt itself or a hwpt automatically created * for the specified ioas by kernel during the attachment. + * @pasid: The pasid to be attached, only meaningful when + * VFIO_DEVICE_ATTACH_PASID is set in @flags * * Associate the device with an address space within the bound iommufd. * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only * allowed on cdev fds. * - * If a vfio device is currently attached to a valid hw_pagetable, without doing - * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl - * passing in another hw_pagetable (hwpt) id is allowed. This action, also known - * as a hw_pagetable replacement, will replace the device's currently attached - * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. + * If a vfio device or a pasid of this device is currently attached to a valid + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed. + * This action, also known as a hw_pagetable replacement, will replace the + * currently attached hwpt of the device or the pasid of this device with a new + * hwpt corresponding to the given pt_id. * * Return: 0 on success, -errno on failure. */ struct vfio_device_attach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_ATTACH_PASID (1 << 0) __u32 pt_id; + __u32 pasid; }; #define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) @@ -962,17 +967,21 @@ struct vfio_device_attach_iommufd_pt { * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, * struct vfio_device_detach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for detach. + * @pasid: The pasid to be detached, only meaningful when + * VFIO_DEVICE_DETACH_PASID is set in @flags * - * Remove the association of the device and its current associated address - * space. After it, the device should be in a blocking DMA state. This is only - * allowed on cdev fds. + * Remove the association of the device or a pasid of the device and its current + * associated address space. After it, the device or the pasid should be in a + * blocking DMA state. This is only allowed on cdev fds. * * Return: 0 on success, -errno on failure. */ struct vfio_device_detach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_DETACH_PASID (1 << 0) + __u32 pasid; }; #define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) -- cgit v1.2.3 From 803f97298e7de9242eb677a1351dcafbbcc9117e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:42 -0700 Subject: iommufd: Extend IOMMU_GET_HW_INFO to report PASID capability PASID usage requires PASID support in both device and IOMMU. Since the iommu drivers always enable the PASID capability for the device if it is supported, this extends the IOMMU_GET_HW_INFO to report the PASID capability to userspace. Also, enhances the selftest accordingly. Link: https://patch.msgid.link/r/20250321180143.8468-5-yi.l.liu@intel.com Cc: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao #aarch64 platform Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 34 +++++++++++++++++++++++++++++++++- drivers/pci/ats.c | 33 +++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 3 +++ include/uapi/linux/iommufd.h | 14 +++++++++++++- 4 files changed, 82 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 1605f6c0e1ee..2307daad65c0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include @@ -1455,7 +1456,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1512,6 +1514,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c6b266c772c8..ec6c8dbdc5e9 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -538,4 +538,37 @@ int pci_max_pasids(struct pci_dev *pdev) return (1 << FIELD_GET(PCI_PASID_CAP_WIDTH, supported)); } EXPORT_SYMBOL_GPL(pci_max_pasids); + +/** + * pci_pasid_status - Check the PASID status + * @pdev: PCI device structure + * + * Returns a negative value when no PASID capability is present. + * Otherwise the value of the control register is returned. + * Status reported are: + * + * PCI_PASID_CTRL_ENABLE - PASID enabled + * PCI_PASID_CTRL_EXEC - Execute permission enabled + * PCI_PASID_CTRL_PRIV - Privileged mode enabled + */ +int pci_pasid_status(struct pci_dev *pdev) +{ + int pasid; + u16 ctrl; + + if (pdev->is_virtfn) + pdev = pci_physfn(pdev); + + pasid = pdev->pasid_cap; + if (!pasid) + return -EINVAL; + + pci_read_config_word(pdev, pasid + PCI_PASID_CTRL, &ctrl); + + ctrl &= PCI_PASID_CTRL_ENABLE | PCI_PASID_CTRL_EXEC | + PCI_PASID_CTRL_PRIV; + + return ctrl; +} +EXPORT_SYMBOL_GPL(pci_pasid_status); #endif /* CONFIG_PCI_PASID */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 6901804ec736..e2c04e58a997 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -612,9 +612,17 @@ enum iommu_hw_info_type { * IOMMU_HWPT_GET_DIRTY_BITMAP * IOMMU_HWPT_SET_DIRTY_TRACKING * + * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. + * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. */ enum iommufd_hw_capabilities { IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, + IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; /** @@ -630,6 +638,9 @@ enum iommufd_hw_capabilities { * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined * in the enum iommu_hw_capabilities. + * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. + * PCI devices turn to out_capabilities to check if the + * specific capabilities is supported or not. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -653,7 +664,8 @@ struct iommu_hw_info { __u32 data_len; __aligned_u64 data_uptr; __u32 out_data_type; - __u32 __reserved; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) -- cgit v1.2.3 From 858c9c10c123b7b04bba12c689db675c18d48bda Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 28 Mar 2025 18:46:54 +0700 Subject: iommufd: Fix iommu_vevent_header tables markup Stephen Rothwell reports htmldocs warnings on iommufd_vevent_header tables: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: CRITICAL: Unexpected section title or transition. ------------------------------------------------------------------------- [docutils] WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -sphinx-version 8.1.3 ./include/uapi/linux/iommufd.h' processing failed with: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: (SEVERE/4) Unexpected section title or transition. ------------------------------------------------------------------------- These are because Sphinx confuses the tables for section headings. Fix the table markup to squash away above warnings. Fixes: e36ba5ab808e ("iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC") Link: https://patch.msgid.link/r/20250328114654.55840-1-bagasdotme@gmail.com Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250318213359.5dc56fd1@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e2c04e58a997..f29b6c44655e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1045,21 +1045,26 @@ enum iommu_veventq_flag { * [0, INT_MAX] where the following index of INT_MAX is 0 * * Each iommufd_vevent_header reports a sequence index of the following vEVENT: - * ------------------------------------------------------------------------- + * + * +----------------------+-------+----------------------+-------+---+-------+ * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | - * ------------------------------------------------------------------------- + * +----------------------+-------+----------------------+-------+---+-------+ + * * And this sequence index is expected to be monotonic to the sequence index of * the previous vEVENT. If two adjacent sequence indexes has a delta larger than * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: - * ------------------------------------------------------------------------- + * + * +-----+----------------------+-------+----------------------+-------+-----+ * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | - * ------------------------------------------------------------------------- + * +-----+----------------------+-------+----------------------+-------+-----+ + * * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header * would be added to the tail, and no data would follow this header: - * --------------------------------------------------------------------------- + * + * +--+----------------------+-------+-----------------------------------------+ * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | - * --------------------------------------------------------------------------- + * +--+----------------------+-------+-----------------------------------------+ */ struct iommufd_vevent_header { __u32 flags; @@ -1117,9 +1122,11 @@ struct iommu_vevent_arm_smmuv3 { * * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by * a type-specific data structure, in a normal case: - * ------------------------------------------------------------- - * || header0 | data0 | header1 | data1 | ... | headerN | dataN || - * ------------------------------------------------------------- + * + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to * struct iommufd_vevent_header). */ -- cgit v1.2.3