From a2ad1b8101a36c927bcbd1317475b9576c352155 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:51 -0700 Subject: mm/gup: Add folio_add_pins() Export a function that adds pins to an already-pinned huge-page folio. This allows any range of small pages within the folio to be unpinned later. For example, pages pinned via memfd_pin_folios and modified by folio_add_pins could be unpinned via unpin_user_page(s). Link: https://patch.msgid.link/r/1729861919-234514-2-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Suggested-by: David Hildenbrand Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Signed-off-by: Jason Gunthorpe --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..f9de33ac2add 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2524,6 +2524,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, struct folio **folios, unsigned int max_folios, pgoff_t *offset); +int folio_add_pins(struct folio *folio, unsigned int pins); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -- cgit v1.2.3 From 35890f85573c2ebbbf3491dc66f7ee2ad63055af Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:45 -0300 Subject: vfio: Remove VFIO_TYPE1_NESTING_IOMMU This control causes the ARM SMMU drivers to choose a stage 2 implementation for the IO pagetable (vs the stage 1 usual default), however this choice has no significant visible impact to the VFIO user. Further qemu never implemented this and no other userspace user is known. The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide SMMU translation services to the guest operating system" however the rest of the API to set the guest table pointer for the stage 1 and manage invalidation was never completed, or at least never upstreamed, rendering this part useless dead code. Upstream has now settled on iommufd as the uAPI for controlling nested translation. Choosing the stage 2 implementation should be done by through the IOMMU_HWPT_ALLOC_NEST_PARENT flag during domain allocation. Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the enable_nesting iommu_domain_op. Just in-case there is some userspace using this continue to treat requesting it as a NOP, but do not advertise support any more. Acked-by: Alex Williamson Reviewed-by: Mostafa Saleh Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ---------------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 16 ---------------- drivers/iommu/iommu.c | 10 ---------- drivers/iommu/iommufd/vfio_compat.c | 7 +------ drivers/vfio/vfio_iommu_type1.c | 12 +----------- include/linux/iommu.h | 3 --- include/uapi/linux/vfio.h | 2 +- 7 files changed, 3 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 737c5b882355..acf250aeb18b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3378,21 +3378,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_of_xlate(struct device *dev, const struct of_phandle_args *args) { @@ -3514,7 +3499,6 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .free = arm_smmu_domain_free_paging, } }; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 8321962b3714..12b173eec454 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1558,21 +1558,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { @@ -1656,7 +1641,6 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .free = arm_smmu_domain_free, } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 83c8e617a2c5..dbd70d5a4702 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2723,16 +2723,6 @@ static int __init iommu_init(void) } core_initcall(iommu_init); -int iommu_enable_nesting(struct iommu_domain *domain) -{ - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - if (!domain->ops->enable_nesting) - return -EINVAL; - return domain->ops->enable_nesting(domain); -} -EXPORT_SYMBOL_GPL(iommu_enable_nesting); - int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirk) { diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index a3ad5f0b6c59..514aacd64009 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -291,12 +291,7 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); - /* - * This is obsolete, and to be removed from VFIO. It was an incomplete - * idea that got merged. - * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ - */ - case VFIO_TYPE1_NESTING_IOMMU: + case __VFIO_RESERVED_TYPE1_NESTING_IOMMU: return 0; /* diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index bf391b40e576..50ebc9593c9d 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -72,7 +72,6 @@ struct vfio_iommu { uint64_t pgsize_bitmap; uint64_t num_non_pinned_groups; bool v2; - bool nesting; bool dirty_page_tracking; struct list_head emulated_iommu_groups; }; @@ -2195,12 +2194,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_free_domain; } - if (iommu->nesting) { - ret = iommu_enable_nesting(domain->domain); - if (ret) - goto out_domain; - } - ret = iommu_attach_group(domain->domain, group->iommu_group); if (ret) goto out_domain; @@ -2541,9 +2534,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) switch (arg) { case VFIO_TYPE1_IOMMU: break; - case VFIO_TYPE1_NESTING_IOMMU: - iommu->nesting = true; - fallthrough; + case __VFIO_RESERVED_TYPE1_NESTING_IOMMU: case VFIO_TYPE1v2_IOMMU: iommu->v2 = true; break; @@ -2638,7 +2629,6 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, switch (arg) { case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_NESTING_IOMMU: case VFIO_UNMAP_ALL: return 1; case VFIO_UPDATE_VADDR: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..c88d18d2c928 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -635,7 +635,6 @@ struct iommu_ops { * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform * specific mechanisms. - * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @free: Release the domain after use. */ @@ -663,7 +662,6 @@ struct iommu_domain_ops { dma_addr_t iova); bool (*enforce_cache_coherency)(struct iommu_domain *domain); - int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); @@ -844,7 +842,6 @@ extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..c8dbf8219c4f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -35,7 +35,7 @@ #define VFIO_EEH 5 /* Two-stage IOMMU */ -#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ #define VFIO_SPAPR_TCE_v2_IOMMU 7 -- cgit v1.2.3 From 807404d66fcf898d4bcc6a3e3edb07ffd5b88400 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:47 -0300 Subject: ACPI/IORT: Support CANWBS memory access flag The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory Access Flag field in the Memory Access Properties table, mainly for a PCI Root Complex. This CANWBS defines the coherency of memory accesses to be not marked IOWB cacheable/shareable. Its value further implies the coherency impact from a pair of mismatched memory attributes (e.g. in a nested translation case): 0x0: Use of mismatched memory attributes for accesses made by this device may lead to a loss of coherency. 0x1: Coherency of accesses made by this device to locations in Conventional memory are ensured as follows, even if the memory attributes for the accesses presented by the device or provided by the SMMU are different from Inner and Outer Write-back cacheable, Shareable. Note that the loss of coherency on a CANWBS-unsupported HW typically could occur to an SMMU that doesn't implement the S2FWB feature where additional cache flush operations would be required to prevent that from happening. Add a new ACPI_IORT_MF_CANWBS flag and set IOMMU_FWSPEC_PCI_RC_CANWBS upon the presence of this new flag. CANWBS and S2FWB are similar features, in that they both guarantee the VM can not violate coherency, however S2FWB can be bypassed by PCI No Snoop TLPs, while CANWBS cannot. Thus CANWBS meets the requirements to set IOMMU_CAP_ENFORCE_CACHE_COHERENCY. Architecturally ARM has expected that VFIO would disable No Snoop through PCI Config space, if this is done then the two would have the same protections. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Hanjun Guo Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 13 +++++++++++++ include/linux/iommu.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4c745a26226b..1f7e4c691d9e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1218,6 +1218,17 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED; } +static bool iort_pci_rc_supports_canwbs(struct acpi_iort_node *node) +{ + struct acpi_iort_memory_access *memory_access; + struct acpi_iort_root_complex *pci_rc; + + pci_rc = (struct acpi_iort_root_complex *)node->node_data; + memory_access = + (struct acpi_iort_memory_access *)&pci_rc->memory_properties; + return memory_access->memory_flags & ACPI_IORT_MF_CANWBS; +} + static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) { @@ -1335,6 +1346,8 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in) fwspec = dev_iommu_fwspec_get(dev); if (fwspec && iort_pci_rc_supports_ats(node)) fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; + if (fwspec && iort_pci_rc_supports_canwbs(node)) + fwspec->flags |= IOMMU_FWSPEC_PCI_RC_CANWBS; } else { node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c88d18d2c928..4ad9b9ec6c9b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -991,6 +991,8 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* CANWBS is supported */ +#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1) /* * An iommu attach handle represents a relationship between an iommu domain -- cgit v1.2.3 From d1b3dad9de7962f7bea861f27e352bac17b68bed Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:17 -0800 Subject: iommufd: Move struct iommufd_object to public iommufd header Prepare for an embedded structure design for driver-level iommufd_viommu objects: // include/linux/iommufd.h struct iommufd_viommu { struct iommufd_object obj; .... }; // Some IOMMU driver struct iommu_driver_viommu { struct iommufd_viommu core; .... }; It has to expose struct iommufd_object and enum iommufd_object_type from the core-level private header to the public iommufd header. Link: https://patch.msgid.link/r/54a43b0768089d690104530754f499ca05ce0074.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 25 +------------------------ include/linux/iommufd.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 8f3c21a664bd..94cfcab7e9de 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -5,8 +5,8 @@ #define __IOMMUFD_PRIVATE_H #include +#include #include -#include #include #include #include @@ -126,29 +126,6 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, return 0; } -enum iommufd_object_type { - IOMMUFD_OBJ_NONE, - IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, - IOMMUFD_OBJ_DEVICE, - IOMMUFD_OBJ_HWPT_PAGING, - IOMMUFD_OBJ_HWPT_NESTED, - IOMMUFD_OBJ_IOAS, - IOMMUFD_OBJ_ACCESS, - IOMMUFD_OBJ_FAULT, -#ifdef CONFIG_IOMMUFD_TEST - IOMMUFD_OBJ_SELFTEST, -#endif - IOMMUFD_OBJ_MAX, -}; - -/* Base struct for all objects with a userspace ID handle. */ -struct iommufd_object { - refcount_t shortterm_users; - refcount_t users; - enum iommufd_object_type type; - unsigned int id; -}; - static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 30f832a60ccb..22948dd03d67 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include struct device; @@ -18,6 +19,29 @@ struct iommufd_ctx; struct iommufd_device; struct page; +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, + IOMMUFD_OBJ_HWPT_PAGING, + IOMMUFD_OBJ_HWPT_NESTED, + IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif + IOMMUFD_OBJ_MAX, +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + refcount_t shortterm_users; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; + struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -- cgit v1.2.3 From 7d4f46c2372d5a850e28f63001013de50843b6e6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:18 -0800 Subject: iommufd: Move _iommufd_object_alloc helper to a sharable file The following patch will add a new vIOMMU allocator that will require this _iommufd_object_alloc to be sharable with IOMMU drivers (and iommufd too). Add a new driver.c file that will be built with CONFIG_IOMMUFD_DRIVER_CORE selected by CONFIG_IOMMUFD, and put the CONFIG_DRIVER under that remaining to be selectable for drivers to build the existing iova_bitmap.c file. Link: https://patch.msgid.link/r/2f4f6e116dc49ffb67ff6c5e8a7a8e789ab9e98e.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Kconfig | 4 ++++ drivers/iommu/iommufd/Makefile | 3 +++ drivers/iommu/iommufd/driver.c | 40 +++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 4 ---- drivers/iommu/iommufd/main.c | 32 -------------------------- include/linux/iommufd.h | 13 +++++++++++ 6 files changed, 60 insertions(+), 36 deletions(-) create mode 100644 drivers/iommu/iommufd/driver.c (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 76656fe0470d..0a07f9449fd9 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -1,4 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only +config IOMMUFD_DRIVER_CORE + tristate + default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n + config IOMMUFD tristate "IOMMU Userspace API" select INTERVAL_TREE diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index cf4605962bea..83df9077063e 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -13,3 +13,6 @@ iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o obj-$(CONFIG_IOMMUFD) += iommufd.o obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o + +iommufd_driver-y := driver.o +obj-$(CONFIG_IOMMUFD_DRIVER_CORE) += iommufd_driver.o diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c new file mode 100644 index 000000000000..2bc47d92a0ab --- /dev/null +++ b/drivers/iommu/iommufd/driver.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#include "iommufd_private.h" + +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD); + +MODULE_DESCRIPTION("iommufd code shared with builtin modules"); +MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 94cfcab7e9de..be347f726fda 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -206,10 +206,6 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, iommufd_object_remove(ictx, obj, obj->id, 0); } -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type); - #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 826a2b2be52f..3c32b440471b 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -29,38 +29,6 @@ struct iommufd_object_ops { static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type) -{ - struct iommufd_object *obj; - int rc; - - obj = kzalloc(size, GFP_KERNEL_ACCOUNT); - if (!obj) - return ERR_PTR(-ENOMEM); - obj->type = type; - /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); - refcount_set(&obj->users, 1); - - /* - * Reserve an ID in the xarray but do not publish the pointer yet since - * the caller hasn't initialized it yet. Once the pointer is published - * in the xarray and visible to other threads we can't reliably destroy - * it anymore, so the caller must complete all errorable operations - * before calling iommufd_object_finalize(). - */ - rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, - xa_limit_31b, GFP_KERNEL_ACCOUNT); - if (rc) - goto out_free; - return obj; -out_free: - kfree(obj); - return ERR_PTR(rc); -} - /* * Allow concurrent access to the object. * diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 22948dd03d67..94522d4029ca 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -135,4 +135,17 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) return -EOPNOTSUPP; } #endif /* CONFIG_IOMMUFD */ + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline struct iommufd_object * +_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, + enum iommufd_object_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE */ #endif -- cgit v1.2.3 From 6b22d562fcd6e3d1cc1c265b0596840946d16a09 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:19 -0800 Subject: iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent a slice of physical IOMMU device passed to or shared with a user space VM. This slice, now a vIOMMU object, is a group of virtualization resources of a physical IOMMU's, such as: - Security namespace for guest owned ID, e.g. guest-controlled cache tags - Non-device-affiliated event reporting, e.g. invalidation queue errors - Access to a sharable nesting parent pagetable across physical IOMMUs - Virtualization of various platforms IDs, e.g. RIDs and others - Delivery of paravirtualized invalidation - Direct assigned invalidation queues - Direct assigned interrupts Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own vIOMMU structures. And this allocation also needs a free(), so add struct iommufd_viommu_ops. To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper. It's suggested that a driver should embed a core-level viommu structure in its driver-level viommu struct and call the iommufd_viommu_alloc() helper, meanwhile the driver can also implement a viommu ops: struct my_driver_viommu { struct iommufd_viommu core; /* driver-owned properties/features */ .... }; static const struct iommufd_viommu_ops my_driver_viommu_ops = { .free = my_driver_viommu_free, /* future ops for virtualization features */ .... }; static struct iommufd_viommu my_driver_viommu_alloc(...) { struct my_driver_viommu *my_viommu = iommufd_viommu_alloc(ictx, my_driver_viommu, core, my_driver_viommu_ops); /* Init my_viommu and related HW feature */ .... return &my_viommu->core; } static struct iommu_domain_ops my_driver_domain_ops = { .... .viommu_alloc = my_driver_viommu_alloc, }; Link: https://patch.msgid.link/r/64685e2b79dea0f1dc56f6ede04809b72d578935.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 14 ++++++++++++++ include/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..2574fc8abaf2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -42,6 +42,8 @@ struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; struct iommu_fault_param; +struct iommufd_ctx; +struct iommufd_viommu; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -542,6 +544,14 @@ static inline int __iommu_copy_struct_from_user_array( * @remove_dev_pasid: Remove any translation configurations of a specific * pasid, so that any DMA transactions with this pasid * will be blocked by the hardware. + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind + * the @dev, as the set of virtualization resources shared/passed + * to user space IOMMU instance. And associate it with a nesting + * @parent_domain. The @viommu_type must be defined in the header + * include/uapi/linux/iommufd.h + * It is required to call iommufd_viommu_alloc() helper for + * a bundled allocation of the core and the driver structures, + * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -591,6 +601,10 @@ struct iommu_ops { void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, struct iommu_domain *domain); + struct iommufd_viommu *(*viommu_alloc)( + struct device *dev, struct iommu_domain *parent_domain, + struct iommufd_ctx *ictx, unsigned int viommu_type); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 94522d4029ca..4fc2ce332f10 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -17,6 +17,7 @@ struct iommu_group; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; +struct iommufd_viommu_ops; struct page; enum iommufd_object_type { @@ -28,6 +29,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, + IOMMUFD_OBJ_VIOMMU, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -78,6 +80,26 @@ void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); +struct iommufd_viommu { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommu_device *iommu_dev; + struct iommufd_hwpt_paging *hwpt; + + const struct iommufd_viommu_ops *ops; + + unsigned int type; +}; + +/** + * struct iommufd_viommu_ops - vIOMMU specific operations + * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory + * of the vIOMMU will be free-ed by iommufd core after calling this op + */ +struct iommufd_viommu_ops { + void (*destroy)(struct iommufd_viommu *viommu); +}; + #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); struct iommufd_ctx *iommufd_ctx_from_fd(int fd); @@ -148,4 +170,22 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ + +/* + * Helpers for IOMMU driver to allocate driver structures that will be freed by + * the iommufd core. The free op will be called prior to freeing the memory. + */ +#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ + ({ \ + drv_struct *ret; \ + \ + static_assert(__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member.obj) == 0); \ + ret = (drv_struct *)_iommufd_object_alloc( \ + ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ + if (!IS_ERR(ret)) \ + ret->member.ops = viommu_ops; \ + ret; \ + }) #endif -- cgit v1.2.3 From 69d2689e57f5cb235c0609dd2f8fa10c1a832f87 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:22 -0800 Subject: iommufd: Add alloc_domain_nested op to iommufd_viommu_ops Allow IOMMU driver to use a vIOMMU object that holds a nesting parent hwpt/domain to allocate a nested domain. Link: https://patch.msgid.link/r/2dcdb5e405dc0deb68230564530d989d285d959c.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 4fc2ce332f10..de9b56265c9c 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -14,6 +14,7 @@ struct device; struct file; struct iommu_group; +struct iommu_user_data; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -95,9 +96,17 @@ struct iommufd_viommu { * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory * of the vIOMMU will be free-ed by iommufd core after calling this op + * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a + * nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data + * must be defined in include/uapi/linux/iommufd.h. + * It must fully initialize the new iommu_domain before + * returning. Upon failure, ERR_PTR must be returned. */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); + struct iommu_domain *(*alloc_domain_nested)( + struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 0ce5c2477af2e2284b9c70474e4dae85db211680 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:09 -0800 Subject: iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl Introduce a new IOMMUFD_OBJ_VDEVICE to represent a physical device (struct device) against a vIOMMU (struct iommufd_viommu) object in a VM. This vDEVICE object (and its structure) holds all the infos and attributes in the VM, regarding the device related to the vIOMMU. As an initial patch, add a per-vIOMMU virtual ID. This can be: - Virtual StreamID on a nested ARM SMMUv3, an index to a Stream Table - Virtual DeviceID on a nested AMD IOMMU, an index to a Device Table - Virtual RID on a nested Intel VT-D IOMMU, an index to a Context Table Potentially, this vDEVICE structure would hold some vData for Confidential Compute Architecture (CCA). Use this virtual ID to index an "vdevs" xarray that belongs to a vIOMMU object. Add a new ioctl for vDEVICE allocations. Since a vDEVICE is a connection of a device object and an iommufd_viommu object, take two refcounts in the ioctl handler. Link: https://patch.msgid.link/r/cda8fd2263166e61b8191a3b3207e0d2b08545bf.1730836308.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 18 ++++++++ drivers/iommu/iommufd/main.c | 6 +++ drivers/iommu/iommufd/viommu.c | 76 +++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 4 ++ include/uapi/linux/iommufd.h | 22 ++++++++++ 5 files changed, 126 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index e8f5ef550cc9..062656c19a07 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -507,8 +507,26 @@ static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); } +static inline struct iommufd_viommu * +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_VIOMMU), + struct iommufd_viommu, obj); +} + int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); +int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_vdevice_destroy(struct iommufd_object *obj); + +struct iommufd_vdevice { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_viommu *viommu; + struct device *dev; + u64 id; /* per-vIOMMU virtual ID */ +}; #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index cc514f9bc3e6..d735fe04197f 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -308,6 +308,7 @@ union ucmd_buffer { struct iommu_option option; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; + struct iommu_vdevice_alloc vdev; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif @@ -363,6 +364,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id), + IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, + struct iommu_vdevice_alloc, virt_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif @@ -501,6 +504,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy, }, + [IOMMUFD_OBJ_VDEVICE] = { + .destroy = iommufd_vdevice_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 888239b78667..69b88e8c7c26 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -11,6 +11,7 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) if (viommu->ops && viommu->ops->destroy) viommu->ops->destroy(viommu); refcount_dec(&viommu->hwpt->common.obj.users); + xa_destroy(&viommu->vdevs); } int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) @@ -53,6 +54,7 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_hwpt; } + xa_init(&viommu->vdevs); viommu->type = cmd->type; viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging; @@ -79,3 +81,77 @@ out_put_idev: iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } + +void iommufd_vdevice_destroy(struct iommufd_object *obj) +{ + struct iommufd_vdevice *vdev = + container_of(obj, struct iommufd_vdevice, obj); + struct iommufd_viommu *viommu = vdev->viommu; + + /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ + xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL); + refcount_dec(&viommu->obj.users); + put_device(vdev->dev); +} + +int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_vdevice_alloc *cmd = ucmd->cmd; + struct iommufd_vdevice *vdev, *curr; + struct iommufd_viommu *viommu; + struct iommufd_device *idev; + u64 virt_id = cmd->virt_id; + int rc = 0; + + /* virt_id indexes an xarray */ + if (virt_id > ULONG_MAX) + return -EINVAL; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) { + rc = PTR_ERR(idev); + goto out_put_viommu; + } + + if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) { + rc = -EINVAL; + goto out_put_idev; + } + + vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE); + if (IS_ERR(vdev)) { + rc = PTR_ERR(vdev); + goto out_put_idev; + } + + vdev->id = virt_id; + vdev->dev = idev->dev; + get_device(idev->dev); + vdev->viommu = viommu; + refcount_inc(&viommu->obj.users); + + curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); + if (curr) { + rc = xa_err(curr) ?: -EEXIST; + goto out_abort; + } + + cmd->out_vdevice_id = vdev->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_abort; + iommufd_object_finalize(ucmd->ictx, &vdev->obj); + goto out_put_idev; + +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_put_idev: + iommufd_put_object(ucmd->ictx, &idev->obj); +out_put_viommu: + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index de9b56265c9c..71fa1e343023 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device; struct file; @@ -31,6 +32,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, + IOMMUFD_OBJ_VDEVICE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -89,6 +91,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; + struct xarray vdevs; + unsigned int type; }; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a498d4838f9a..9b5236004b8e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -53,6 +53,7 @@ enum { IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, + IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, }; /** @@ -864,4 +865,25 @@ struct iommu_viommu_alloc { __u32 out_viommu_id; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) + +/** + * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) + * @size: sizeof(struct iommu_vdevice_alloc) + * @viommu_id: vIOMMU ID to associate with the virtual device + * @dev_id: The physical device to allocate a virtual instance on the vIOMMU + * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID + * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. + */ +struct iommu_vdevice_alloc { + __u32 size; + __u32 viommu_id; + __u32 dev_id; + __u32 out_vdevice_id; + __aligned_u64 virt_id; +}; +#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) #endif -- cgit v1.2.3 From 67db79dc1a411335f8a7e53a5e206d96b237d9a8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:11 -0800 Subject: iommu/viommu: Add cache_invalidate to iommufd_viommu_ops This per-vIOMMU cache_invalidate op is like the cache_invalidate_user op in struct iommu_domain_ops, but wider, supporting device cache (e.g. PCI ATC invaldiations). Link: https://patch.msgid.link/r/90138505850fa6b165135e78a87b4cc7022869a4.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 71fa1e343023..2bc735ff9511 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,6 +16,7 @@ struct device; struct file; struct iommu_group; struct iommu_user_data; +struct iommu_user_data_array; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -105,12 +106,21 @@ struct iommufd_viommu { * must be defined in include/uapi/linux/iommufd.h. * It must fully initialize the new iommu_domain before * returning. Upon failure, ERR_PTR must be returned. + * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for + * any IOMMU hardware specific cache: TLB and device cache. + * The @array passes in the cache invalidation requests, in + * form of a driver data structure. A driver must update the + * array->entry_num to report the number of handled requests. + * The data structure of the array entry must be defined in + * include/uapi/linux/iommufd.h */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); struct iommu_domain *(*alloc_domain_nested)( struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data); + int (*cache_invalidate)(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 4f2e59ccb69866c5165525bd7aee47cd5cd00acc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Nov 2024 12:05:13 -0800 Subject: iommu: Add iommu_copy_struct_from_full_user_array helper The iommu_copy_struct_from_user_array helper can be used to copy a single entry from a user array which might not be efficient if the array is big. Add a new iommu_copy_struct_from_full_user_array to copy the entire user array at once. Update the existing iommu_copy_struct_from_user_array kdoc accordingly. Link: https://patch.msgid.link/r/5cd773d9c26920c5807d232b21d415ea79172e49.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2574fc8abaf2..11de66237eaa 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -493,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array( * @index: Index to the location in the array to copy user data from * @min_last: The last member of the data structure @kdst points in the * initial version. - * Return 0 for success, otherwise -error. + * + * Copy a single entry from a user array. Return 0 for success, otherwise + * -error. */ #define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ min_last) \ @@ -501,6 +503,50 @@ static inline int __iommu_copy_struct_from_user_array( kdst, user_array, data_type, index, sizeof(*(kdst)), \ offsetofend(typeof(*(kdst)), min_last)) +/** + * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user + * space data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @kdst_entry_size: sizeof(*kdst) + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * + * Copy the entire user array. kdst must have room for kdst_entry_size * + * user_array->entry_num bytes. Return 0 for success, otherwise -error. + */ +static inline int +iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, + struct iommu_user_data_array *user_array, + unsigned int data_type) +{ + unsigned int i; + int ret; + + if (user_array->type != data_type) + return -EINVAL; + if (!user_array->entry_num) + return -EINVAL; + if (likely(user_array->entry_len == kdst_entry_size)) { + if (copy_from_user(kdst, user_array->uptr, + user_array->entry_num * + user_array->entry_len)) + return -EFAULT; + } + + /* Copy item by item */ + for (i = 0; i != user_array->entry_num; i++) { + ret = copy_struct_from_user( + kdst + kdst_entry_size * i, kdst_entry_size, + user_array->uptr + user_array->entry_len * i, + user_array->entry_len); + if (ret) + return ret; + } + return 0; +} + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability -- cgit v1.2.3 From c747e67978ff473e6830afe0b1f3986dd1f444b5 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:14 -0800 Subject: iommufd/viommu: Add iommufd_viommu_find_dev helper This avoids a bigger trouble of exposing struct iommufd_device and struct iommufd_vdevice in the public header. Link: https://patch.msgid.link/r/84fa7c624db4d4508067ccfdf42059533950180a.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 13 +++++++++++++ include/linux/iommufd.h | 8 ++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 2bc47d92a0ab..7b67fdf44134 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -36,5 +36,18 @@ out_free: } EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD); +/* Caller should xa_lock(&viommu->vdevs) to protect the return value */ +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id) +{ + struct iommufd_vdevice *vdev; + + lockdep_assert_held(&viommu->vdevs.xa_lock); + + vdev = xa_load(&viommu->vdevs, vdev_id); + return vdev ? vdev->dev : NULL; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, IOMMUFD); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 2bc735ff9511..11110c749200 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -185,6 +185,8 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type); +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -192,6 +194,12 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct device * +iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) +{ + return NULL; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From 67e4fe3985138325c9b21193be52266750616182 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:54 -0300 Subject: iommu/arm-smmu-v3: Use S2FWB for NESTED domains Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field works. When S2FWB is supported and enabled the IOPTE will force cachable access to IOMMU_CACHE memory when nesting with a S1 and deny cachable access when !IOMMU_CACHE. When using a single stage of translation, a simple S2 domain, it doesn't change things for PCI devices as it is just a different encoding for the existing mapping of the IOMMU protection flags to cachability attributes. For non-PCI it also changes the combining rules when incoming transactions have inconsistent attributes. However, when used with a nested S1, FWB has the effect of preventing the guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to bypass the cache. Consistent with KVM we wish to deny the guest the ability to become incoherent with cached memory the hypervisor believes is cachable so we don't have to flush it. Allow NESTED domains to be created if the SMMU has S2FWB support and use S2FWB for NESTING_PARENTS. This is an additional option to CANWBS. Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 7 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ drivers/iommu/io-pgtable-arm.c | 27 +++++++++++++++++----- include/linux/io-pgtable.h | 2 ++ 5 files changed, 38 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 91247a2a2d2c..a1c8fcd4797c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -220,9 +220,12 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, * Must support some way to prevent the VM from bypassing the cache * because VFIO currently does not do any cache maintenance. canwbs * indicates the device is fully coherent and no cache maintenance is - * ever required, even for PCI No-Snoop. + * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make + * things non-coherent using the memattr, but No-Snoop behavior is not + * effected. */ - if (!arm_smmu_master_canwbs(master)) + if (!arm_smmu_master_canwbs(master) && + !(smmu->features & ARM_SMMU_FEAT_S2FWB)) return ERR_PTR(-EOPNOTSUPP); vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 53f12b9d78ab..de598d66b5c2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1046,7 +1046,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) /* S2 translates */ if (cfg & BIT(1)) { used_bits[1] |= - cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); + cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | + STRTAB_STE_1_SHCFG); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | @@ -1654,6 +1655,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_EATS, ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); @@ -2472,6 +2475,9 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; + if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; break; default: return -EINVAL; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3fabe187ea78..5a025d310dbe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -58,6 +58,7 @@ struct arm_smmu_device; #define IDR1_SIDSIZE GENMASK(5, 0) #define ARM_SMMU_IDR3 0xc +#define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) #define ARM_SMMU_IDR5 0x14 @@ -265,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) @@ -740,6 +742,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) +#define ARM_SMMU_FEAT_S2FWB (1 << 23) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0e67f1721a3d..74f58c6ac30c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -106,6 +106,18 @@ #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +/* + * For !FWB these code to: + * 1111 = Normal outer write back cachable / Inner Write Back Cachable + * Permit S1 to override + * 0101 = Normal Non-cachable / Inner Non-cachable + * 0001 = Device / Device-nGnRE + * For S2FWB these code: + * 0110 Force Normal Write Back + * 0101 Normal* is forced Normal-NC, Device unchanged + * 0001 Force Device-nGnRE + */ +#define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2) #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) @@ -458,12 +470,16 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, */ if (data->iop.fmt == ARM_64_LPAE_S2 || data->iop.fmt == ARM_32_LPAE_S2) { - if (prot & IOMMU_MMIO) + if (prot & IOMMU_MMIO) { pte |= ARM_LPAE_PTE_MEMATTR_DEV; - else if (prot & IOMMU_CACHE) - pte |= ARM_LPAE_PTE_MEMATTR_OIWB; - else + } else if (prot & IOMMU_CACHE) { + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB; + else + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + } else { pte |= ARM_LPAE_PTE_MEMATTR_NC; + } } else { if (prot & IOMMU_MMIO) pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV @@ -1035,8 +1051,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; - /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b1ecfc3cd5bc..ce86b09ae80f 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -87,6 +87,7 @@ struct io_pgtable_cfg { * attributes set in the TCR for a non-coherent page-table walker. * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -95,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3