summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2024-07-09 19:55:05 +0300
committerJason Gunthorpe <jgg@nvidia.com>2024-07-09 19:55:05 +0300
commit18dcca24963a3401c7194e259aec689181a5c130 (patch)
tree4315b7244e6dc5b810edc4e20c2b35d2c04cd35d /include
parent53e6b65693b68519dcfd384280bfc3d34c7398e2 (diff)
parentd1211768b62d02e27b46a3ff78f739c4776a0f03 (diff)
downloadlinux-18dcca24963a3401c7194e259aec689181a5c130.tar.xz
Merge branch 'iommufd_pri' into iommufd for-next
Lu Baolu says: ==================== This series implements the functionality of delivering IO page faults to user space through the IOMMUFD framework. One feasible use case is the nested translation. Nested translation is a hardware feature that supports two-stage translation tables for IOMMU. The second-stage translation table is managed by the host VMM, while the first-stage translation table is owned by user space. This allows user space to control the IOMMU mappings for its devices. When an IO page fault occurs on the first-stage translation table, the IOMMU hardware can deliver the page fault to user space through the IOMMUFD framework. User space can then handle the page fault and respond to the device top-down through the IOMMUFD. This allows user space to implement its own IO page fault handling policies. User space application that is capable of handling IO page faults should allocate a fault object, and bind the fault object to any domain that it is willing to handle the fault generatd for them. On a successful return of fault object allocation, the user can retrieve and respond to page faults by reading or writing to the file descriptor (FD) returned. The iommu selftest framework has been updated to test the IO page fault delivery and response functionality. ==================== * iommufd_pri: iommufd/selftest: Add coverage for IOPF test iommufd/selftest: Add IOPF support for mock device iommufd: Associate fault object with iommufd_hw_pgtable iommufd: Fault-capable hwpt attach/detach/replace iommufd: Add iommufd fault object iommufd: Add fault and response message definitions iommu: Extend domain attach group with handle support iommu: Add attach handle to struct iopf_group iommu: Remove sva handle list iommu: Introduce domain attachment handle Link: https://lore.kernel.org/all/20240702063444.105814-1-baolu.lu@linux.intel.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/iommu.h41
-rw-r--r--include/uapi/linux/iommufd.h109
2 files changed, 134 insertions, 16 deletions
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 17b3f36ad843..73bc3aee95a1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,12 +124,16 @@ struct iopf_fault {
struct iopf_group {
struct iopf_fault last_fault;
struct list_head faults;
+ size_t fault_count;
/* list node for iommu_fault_param::faults */
struct list_head pending_node;
struct work_struct work;
- struct iommu_domain *domain;
+ struct iommu_attach_handle *attach_handle;
/* The device's fault data parameter. */
struct iommu_fault_param *fault_param;
+ /* Used by handler provider to hook the group on its own lists. */
+ struct list_head node;
+ u32 cookie;
};
/**
@@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array(
* @default_domain: If not NULL this will always be set as the default domain.
* This should be an IDENTITY/BLOCKED/PLATFORM domain.
* Do not use in new drivers.
+ * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is
+ * no user domain for each PASID and the I/O page faults are
+ * forwarded through the user domain attached to the device
+ * RID.
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
@@ -590,6 +598,7 @@ struct iommu_ops {
struct iommu_domain *blocked_domain;
struct iommu_domain *release_domain;
struct iommu_domain *default_domain;
+ u8 user_pasid_table:1;
};
/**
@@ -989,20 +998,28 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
+/*
+ * An iommu attach handle represents a relationship between an iommu domain
+ * and a PASID or RID of a device. It is allocated and managed by the component
+ * that manages the domain and is stored in the iommu group during the time the
+ * domain is attached.
+ */
+struct iommu_attach_handle {
+ struct iommu_domain *domain;
+};
+
/**
* struct iommu_sva - handle to a device-mm bond
*/
struct iommu_sva {
+ struct iommu_attach_handle handle;
struct device *dev;
- struct iommu_domain *domain;
- struct list_head handle_item;
refcount_t users;
};
struct iommu_mm_data {
u32 pasid;
struct list_head sva_domains;
- struct list_head sva_handles;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid);
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
-struct iommu_domain *
-iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
- unsigned int type);
ioasid_t iommu_alloc_global_pasid(struct device *dev);
void iommu_free_global_pasid(ioasid_t pasid);
#else /* CONFIG_IOMMU_API */
@@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
}
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
return -ENODEV;
}
@@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
{
}
-static inline struct iommu_domain *
-iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
- unsigned int type)
-{
- return NULL;
-}
-
static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
return IOMMU_PASID_INVALID;
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 1dfeaa2e649e..ede2b464a761 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -50,6 +50,7 @@ enum {
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
IOMMUFD_CMD_HWPT_INVALIDATE,
+ IOMMUFD_CMD_FAULT_QUEUE_ALLOC,
};
/**
@@ -356,10 +357,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment
+ * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
+ * valid.
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+ IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
};
/**
@@ -411,6 +415,9 @@ enum iommu_hwpt_data_type {
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
+ * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
+ * IOMMU_HWPT_FAULT_ID_VALID is set.
+ * @__reserved2: Padding to 64-bit alignment. Must be 0.
*
* Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the
@@ -441,6 +448,8 @@ struct iommu_hwpt_alloc {
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
+ __u32 fault_id;
+ __u32 __reserved2;
};
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
@@ -692,4 +701,104 @@ struct iommu_hwpt_invalidate {
__u32 __reserved;
};
#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
+
+/**
+ * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
+ * valid.
+ * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
+ */
+enum iommu_hwpt_pgfault_flags {
+ IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
+ IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
+};
+
+/**
+ * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_PERM_READ: request for read permission
+ * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
+ * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
+ * Execute Requested bit set in PASID TLP Prefix.
+ * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
+ * Privileged Mode Requested bit set in PASID TLP
+ * Prefix.
+ */
+enum iommu_hwpt_pgfault_perm {
+ IOMMU_PGFAULT_PERM_READ = (1 << 0),
+ IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
+ IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
+ IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
+};
+
+/**
+ * struct iommu_hwpt_pgfault - iommu page fault data
+ * @flags: Combination of enum iommu_hwpt_pgfault_flags
+ * @dev_id: id of the originated device
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @addr: Fault address
+ * @length: a hint of how much data the requestor is expecting to fetch. For
+ * example, if the PRI initiator knows it is going to do a 10MB
+ * transfer, it could fill in 10MB and the OS could pre-fault in
+ * 10MB of IOVA. It's default to 0 if there's no such hint.
+ * @cookie: kernel-managed cookie identifying a group of fault messages. The
+ * cookie number encoded in the last page fault of the group should
+ * be echoed back in the response message.
+ */
+struct iommu_hwpt_pgfault {
+ __u32 flags;
+ __u32 dev_id;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 perm;
+ __u64 addr;
+ __u32 length;
+ __u32 cookie;
+};
+
+/**
+ * enum iommufd_page_response_code - Return status of fault handlers
+ * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is the
+ * "Success" defined in PCI 10.4.2.1.
+ * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is the "Invalid Request" in PCI
+ * 10.4.2.1.
+ * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ * this device if possible. This is the "Response
+ * Failure" in PCI 10.4.2.1.
+ */
+enum iommufd_page_response_code {
+ IOMMUFD_PAGE_RESP_SUCCESS = 0,
+ IOMMUFD_PAGE_RESP_INVALID,
+ IOMMUFD_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_hwpt_page_response - IOMMU page fault response
+ * @cookie: The kernel-managed cookie reported in the fault message.
+ * @code: One of response code in enum iommufd_page_response_code.
+ */
+struct iommu_hwpt_page_response {
+ __u32 cookie;
+ __u32 code;
+};
+
+/**
+ * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
+ * @size: sizeof(struct iommu_fault_alloc)
+ * @flags: Must be 0
+ * @out_fault_id: The ID of the new FAULT
+ * @out_fault_fd: The fd of the new FAULT
+ *
+ * Explicitly allocate a fault handling object.
+ */
+struct iommu_fault_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 out_fault_id;
+ __u32 out_fault_fd;
+};
+#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
#endif