From a9c362db39207c4934c9125e56ed730c5297c37c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 21 Nov 2023 18:03:59 +0000 Subject: iommu: Validate that devices match domains Before we can allow drivers to coexist, we need to make sure that one driver's domain ops can't misinterpret another driver's dev_iommu_priv data. To that end, add a token to the domain so we can remember how it was allocated - for now this may as well be the device ops, since they still correlate 1:1 with drivers. We can trust ourselves for internal default domain attachment, so add checks to cover all the public attach interfaces. Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/097c6f30480e4efe12195d00ba0e84ea4837fb4c.1700589539.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ec289c1016f5..077bf8cae2f7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -106,7 +106,7 @@ struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; - + const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; -- cgit v1.2.3 From 17de3f5fdd35676b0e3d41c7c9bf4e3032eb3673 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 21 Nov 2023 18:04:02 +0000 Subject: iommu: Retire bus ops With the rest of the API internals converted, it's time to finally tackle probe_device and how we bootstrap the per-device ops association to begin with. This ends up being disappointingly straightforward, since fwspec users are already doing it in order to find their of_xlate callback, and it works out that we can easily do the equivalent for other drivers too. Then shuffle the remaining awareness of iommu_ops into the couple of core headers that still need it, and breathe a sigh of relief. Ding dong the bus ops are gone! CC: Rafael J. Wysocki Acked-by: Christoph Hellwig Acked-by: Greg Kroah-Hartman Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/a59011ef65b4b6657cb0b7a388d786b779b61305.1700589539.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 31 ++++++++++++++++++------------- include/acpi/acpi_bus.h | 2 ++ include/linux/device.h | 1 - include/linux/device/bus.h | 5 ----- include/linux/dma-map-ops.h | 1 + 5 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 88aeae0acd9b..254f42b4220b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -148,7 +148,7 @@ struct iommu_group_attribute iommu_group_attr_##_name = \ static LIST_HEAD(iommu_device_list); static DEFINE_SPINLOCK(iommu_device_lock); -static struct bus_type * const iommu_buses[] = { +static const struct bus_type * const iommu_buses[] = { &platform_bus_type, #ifdef CONFIG_PCI &pci_bus_type, @@ -257,13 +257,6 @@ int iommu_device_register(struct iommu_device *iommu, /* We need to be able to take module references appropriately */ if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) return -EINVAL; - /* - * Temporarily enforce global restriction to a single driver. This was - * already the de-facto behaviour, since any possible combination of - * existing drivers would compete for at least the PCI or platform bus. - */ - if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) - return -EBUSY; iommu->ops = ops; if (hwdev) @@ -273,10 +266,8 @@ int iommu_device_register(struct iommu_device *iommu, list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { - iommu_buses[i]->iommu_ops = ops; + for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) err = bus_iommu_probe(iommu_buses[i]); - } if (err) iommu_device_unregister(iommu); return err; @@ -329,7 +320,6 @@ int iommu_device_register_bus(struct iommu_device *iommu, list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - bus->iommu_ops = ops; err = bus_iommu_probe(bus); if (err) { iommu_device_unregister_bus(iommu, bus, nb); @@ -496,12 +486,27 @@ static void iommu_deinit_device(struct device *dev) static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops; + struct iommu_fwspec *fwspec; struct iommu_group *group; static DEFINE_MUTEX(iommu_probe_device_lock); struct group_device *gdev; int ret; + /* + * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU + * instances with non-NULL fwnodes, and client devices should have been + * identified with a fwspec by this point. Otherwise, we can currently + * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can + * be present, and that any of their registered instances has suitable + * ops for probing, and thus cheekily co-opt the same mechanism. + */ + fwspec = dev_iommu_fwspec_get(dev); + if (fwspec && fwspec->ops) + ops = fwspec->ops; + else + ops = iommu_ops_from_fwnode(NULL); + if (!ops) return -ENODEV; /* diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1216d72c650f..89079787905d 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -627,6 +627,8 @@ struct acpi_pci_root { /* helper */ +struct iommu_ops; + bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, diff --git a/include/linux/device.h b/include/linux/device.h index d7a72a8749ea..0314dbbdb534 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,7 +42,6 @@ struct class; struct subsys_private; struct device_node; struct fwnode_handle; -struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index ae10c4322754..e25aab08f873 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -62,9 +62,6 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. - * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU - * driver implementations to a bus and allow the driver to do - * bus-specific setup * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,8 +101,6 @@ struct bus_type { const struct dev_pm_ops *pm; - const struct iommu_ops *iommu_ops; - bool need_parent_lock; }; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f2fc203fb8a1..a52e508d1869 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -11,6 +11,7 @@ #include struct cma; +struct iommu_ops; /* * Values for struct dma_map_ops.flags: -- cgit v1.2.3 From 17b226dcf80ce79d02f4f0b08813d8848885b986 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 24 Nov 2023 15:24:33 +0100 Subject: iommu: Allow passing custom allocators to pgtable drivers This will be useful for GPU drivers who want to keep page tables in a pool so they can: - keep freed page tables in a free pool and speed-up upcoming page table allocations - batch page table allocation instead of allocating one page at a time - pre-reserve pages for page tables needed for map/unmap operations, to ensure map/unmap operations don't try to allocate memory in paths they're allowed to block or fail It might also be valuable for other aspects of GPU and similar use-cases, like fine-grained memory accounting and resource limiting. We will extend the Arm LPAE format to support custom allocators in a separate commit. Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20231124142434.1577550-2-boris.brezillon@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable.c | 23 +++++++++++++++++++++++ include/linux/io-pgtable.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index b843fcd365d2..8841c1487f00 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -34,6 +34,26 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif }; +static int check_custom_allocator(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg) +{ + /* No custom allocator, no need to check the format. */ + if (!cfg->alloc && !cfg->free) + return 0; + + /* When passing a custom allocator, both the alloc and free + * functions should be provided. + */ + if (!cfg->alloc || !cfg->free) + return -EINVAL; + + /* Make sure the format supports custom allocators. */ + if (io_pgtable_init_table[fmt]->caps & IO_PGTABLE_CAP_CUSTOM_ALLOCATOR) + return 0; + + return -EINVAL; +} + struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, struct io_pgtable_cfg *cfg, void *cookie) @@ -44,6 +64,9 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, if (fmt >= IO_PGTABLE_NUM_FMTS) return NULL; + if (check_custom_allocator(fmt, cfg)) + return NULL; + fns = io_pgtable_init_table[fmt]; if (!fns) return NULL; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 25142a0e2fc2..86cf1f7ae389 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -100,6 +100,30 @@ struct io_pgtable_cfg { const struct iommu_flush_ops *tlb; struct device *iommu_dev; + /** + * @alloc: Custom page allocator. + * + * Optional hook used to allocate page tables. If this function is NULL, + * @free must be NULL too. + * + * Memory returned should be zeroed and suitable for dma_map_single() and + * virt_to_phys(). + * + * Not all formats support custom page allocators. Before considering + * passing a non-NULL value, make sure the chosen page format supports + * this feature. + */ + void *(*alloc)(void *cookie, size_t size, gfp_t gfp); + + /** + * @free: Custom page de-allocator. + * + * Optional hook used to free page tables allocated with the @alloc + * hook. Must be non-NULL if @alloc is not NULL, must be NULL + * otherwise. + */ + void (*free)(void *cookie, void *pages, size_t size); + /* Low-level data specific to the table format */ union { struct { @@ -241,16 +265,26 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } +/** + * enum io_pgtable_caps - IO page table backend capabilities. + */ +enum io_pgtable_caps { + /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */ + IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0), +}; + /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. * * @alloc: Allocate a set of page tables described by cfg. * @free: Free the page tables associated with iop. + * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities. */ struct io_pgtable_init_fns { struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); void (*free)(struct io_pgtable *iop); + u32 caps; }; extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; -- cgit v1.2.3 From 8f23f5dba6b4693448144bde4dd6f537543442c2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Oct 2023 08:05:20 +0800 Subject: iommu: Change kconfig around IOMMU_SVA Linus suggested that the kconfig here is confusing: https://lore.kernel.org/all/CAHk-=wgUiAtiszwseM1p2fCJ+sC4XWQ+YN4TanFhUgvUqjr9Xw@mail.gmail.com/ Let's break it into three kconfigs controlling distinct things: - CONFIG_IOMMU_MM_DATA controls if the mm_struct has the additional fields for the IOMMU. Currently only PASID, but later patches store a struct iommu_mm_data * - CONFIG_ARCH_HAS_CPU_PASID controls if the arch needs the scheduling bit for keeping track of the ENQCMD instruction. x86 will select this if IOMMU_SVA is enabled - IOMMU_SVA controls if the IOMMU core compiles in the SVA support code for iommu driver use and the IOMMU exported API This way ARM will not enable CONFIG_ARCH_HAS_CPU_PASID Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231027000525.1278806-2-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- arch/Kconfig | 5 +++++ arch/x86/Kconfig | 1 + arch/x86/kernel/traps.c | 2 +- drivers/iommu/Kconfig | 1 + include/linux/iommu.h | 2 +- include/linux/mm_types.h | 2 +- include/linux/sched.h | 2 +- kernel/fork.c | 2 +- mm/Kconfig | 3 +++ mm/init-mm.c | 2 +- 10 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index f4b210ab0612..3e49f862670e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -301,6 +301,11 @@ config ARCH_HAS_DMA_CLEAR_UNCACHED config ARCH_HAS_CPU_FINALIZE_INIT bool +# The architecture has a per-task state that includes the mm's PASID +config ARCH_HAS_CPU_PASID + bool + select IOMMU_MM_DATA + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3762f41bb092..68a2ec36a46e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -71,6 +71,7 @@ config X86 select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CPU_PASID if IOMMU_SVA select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c876f1d36a81..2b62dbb3396a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -565,7 +565,7 @@ static bool fixup_iopl_exception(struct pt_regs *regs) */ static bool try_fixup_enqcmd_gp(void) { -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID u32 pasid; /* diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 7673bb82945b..9a29d742617e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -160,6 +160,7 @@ config IOMMU_DMA # Shared Virtual Addressing config IOMMU_SVA + select IOMMU_MM_DATA bool config FSL_PAMU diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c7394b39599c..cd3f398095bf 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1337,7 +1337,7 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = IOMMU_PASID_INVALID; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 957ce38768b2..41f248608dd9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -938,7 +938,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA u32 pasid; #endif #ifdef CONFIG_KSM diff --git a/include/linux/sched.h b/include/linux/sched.h index 292c31697248..70888a36677b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -954,7 +954,7 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_CPU_SUP_INTEL diff --git a/kernel/fork.c b/kernel/fork.c index 10917c3e1f03..43fd9bc1a522 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1179,7 +1179,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID tsk->pasid_activated = 0; #endif diff --git a/mm/Kconfig b/mm/Kconfig index 89971a894b60..0143f4d905c9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1270,6 +1270,9 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP +config IOMMU_MM_DATA + bool + source "mm/damon/Kconfig" endmenu diff --git a/mm/init-mm.c b/mm/init-mm.c index cfd367822cdd..c52dc2740a3d 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -44,7 +44,7 @@ struct mm_struct init_mm = { #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA .pasid = IOMMU_PASID_INVALID, #endif INIT_MM_CONTEXT(init_mm) -- cgit v1.2.3 From 2396046d75d3c0b2cfead852a77efd023f8539dc Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 27 Oct 2023 08:05:22 +0800 Subject: iommu: Add mm_get_enqcmd_pasid() helper function mm_get_enqcmd_pasid() should be used by architecture code and closely related to learn the PASID value that the x86 ENQCMD operation should use for the mm. For the moment SMMUv3 uses this without any connection to ENQCMD, it will be cleaned up similar to how the prior patch made VT-d use the PASID argument of set_dev_pasid(). The motivation is to replace mm->pasid with an iommu private data structure that is introduced in a later patch. Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Tested-by: Nicolin Chen Signed-off-by: Tina Zhang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231027000525.1278806-4-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- arch/x86/kernel/traps.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 23 +++++++++++++++-------- drivers/iommu/iommu-sva.c | 2 +- include/linux/iommu.h | 12 ++++++++++++ 4 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2b62dbb3396a..5944d759afe7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -591,7 +591,7 @@ static bool try_fixup_enqcmd_gp(void) if (!mm_valid_pasid(current->mm)) return false; - pasid = current->mm->pasid; + pasid = mm_get_enqcmd_pasid(current->mm); /* * Did this thread already have its PASID activated? diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 353248ab18e7..05722121f00e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -246,7 +246,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, smmu_domain); } - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, + size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -264,10 +265,11 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), + &quiet_cd); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); smmu_mn->cleared = true; mutex_unlock(&sva_lock); @@ -325,10 +327,13 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { - ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd); + ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm), + cd); if (ret) { - list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head) - arm_smmu_write_ctx_desc(master, mm->pasid, NULL); + list_for_each_entry_from_reverse( + master, &smmu_domain->devices, domain_head) + arm_smmu_write_ctx_desc( + master, mm_get_enqcmd_pasid(mm), NULL); break; } } @@ -358,7 +363,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) list_del(&smmu_mn->list); - arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), + NULL); /* * If we went through clear(), we've already invalidated, and no @@ -366,7 +372,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) */ if (!smmu_mn->cleared) { arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, + 0); } /* Frees smmu_mn */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index b78671a8a914..4a2f5699747f 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -141,7 +141,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) { struct iommu_domain *domain = handle->domain; - return domain->mm->pasid; + return mm_get_enqcmd_pasid(domain->mm); } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index cd3f398095bf..4fb239c6ca8d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1346,6 +1346,12 @@ static inline bool mm_valid_pasid(struct mm_struct *mm) { return mm->pasid != IOMMU_PASID_INVALID; } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + return mm->pasid; +} + void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); @@ -1368,6 +1374,12 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) } static inline void mm_pasid_init(struct mm_struct *mm) {} static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + return IOMMU_PASID_INVALID; +} + static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif /* CONFIG_IOMMU_SVA */ -- cgit v1.2.3 From 541a3e257d48c16b77d19f39ed939ef5832046df Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 27 Oct 2023 08:05:23 +0800 Subject: mm: Add structure to keep sva information Introduce iommu_mm_data structure to keep sva information (pasid and the related sva domains). Add iommu_mm pointer, pointing to an instance of iommu_mm_data structure, to mm. Reviewed-by: Vasant Hegde Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Tested-by: Nicolin Chen Signed-off-by: Tina Zhang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231027000525.1278806-5-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 +++++ include/linux/mm_types.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4fb239c6ca8d..f7b1b469e98d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -812,6 +812,11 @@ struct iommu_sva { struct iommu_domain *domain; }; +struct iommu_mm_data { + u32 pasid; + struct list_head sva_domains; +}; + int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 41f248608dd9..0b4314fab478 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -727,6 +727,7 @@ struct mm_cid { #endif struct kioctx_table; +struct iommu_mm_data; struct mm_struct { struct { /* @@ -940,6 +941,7 @@ struct mm_struct { #ifdef CONFIG_IOMMU_MM_DATA u32 pasid; + struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM /* -- cgit v1.2.3 From 092edaddb660376648acb97678570ed5d8299768 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 27 Oct 2023 08:05:24 +0800 Subject: iommu: Support mm PASID 1:n with sva domains Each mm bound to devices gets a PASID and corresponding sva domains allocated in iommu_sva_bind_device(), which are referenced by iommu_mm field of the mm. The PASID is released in __mmdrop(), while a sva domain is released when no one is using it (the reference count is decremented in iommu_sva_unbind_device()). However, although sva domains and their PASID are separate objects such that their own life cycles could be handled independently, an enqcmd use case may require releasing the PASID in releasing the mm (i.e., once a PASID is allocated for a mm, it will be permanently used by the mm and won't be released until the end of mm) and only allows to drop the PASID after the sva domains are released. To this end, mmgrab() is called in iommu_sva_domain_alloc() to increment the mm reference count and mmdrop() is invoked in iommu_domain_free() to decrement the mm reference count. Since the required info of PASID and sva domains is kept in struct iommu_mm_data of a mm, use mm->iommu_mm field instead of the old pasid field in mm struct. The sva domain list is protected by iommu_sva_lock. Besides, this patch removes mm_pasid_init(), as with the introduced iommu_mm structure, initializing mm pasid in mm_init() is unnecessary. Reviewed-by: Lu Baolu Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Tested-by: Nicolin Chen Signed-off-by: Tina Zhang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231027000525.1278806-6-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 92 +++++++++++++++++++++++++++-------------------- include/linux/iommu.h | 23 ++++++++++-- 2 files changed, 74 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 4a2f5699747f..5175e8d85247 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -12,32 +12,42 @@ static DEFINE_MUTEX(iommu_sva_lock); /* Allocate a PASID for the mm within range (inclusive) */ -static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev) +static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) { + struct iommu_mm_data *iommu_mm; ioasid_t pasid; - int ret = 0; + + lockdep_assert_held(&iommu_sva_lock); if (!arch_pgtable_dma_compat(mm)) - return -EBUSY; + return ERR_PTR(-EBUSY); - mutex_lock(&iommu_sva_lock); + iommu_mm = mm->iommu_mm; /* Is a PASID already associated with this mm? */ - if (mm_valid_pasid(mm)) { - if (mm->pasid >= dev->iommu->max_pasids) - ret = -EOVERFLOW; - goto out; + if (iommu_mm) { + if (iommu_mm->pasid >= dev->iommu->max_pasids) + return ERR_PTR(-EOVERFLOW); + return iommu_mm; } + iommu_mm = kzalloc(sizeof(struct iommu_mm_data), GFP_KERNEL); + if (!iommu_mm) + return ERR_PTR(-ENOMEM); + pasid = iommu_alloc_global_pasid(dev); if (pasid == IOMMU_PASID_INVALID) { - ret = -ENOSPC; - goto out; + kfree(iommu_mm); + return ERR_PTR(-ENOSPC); } - mm->pasid = pasid; - ret = 0; -out: - mutex_unlock(&iommu_sva_lock); - return ret; + iommu_mm->pasid = pasid; + INIT_LIST_HEAD(&iommu_mm->sva_domains); + /* + * Make sure the write to mm->iommu_mm is not reordered in front of + * initialization to iommu_mm fields. If it does, readers may see a + * valid iommu_mm with uninitialized values. + */ + smp_store_release(&mm->iommu_mm, iommu_mm); + return iommu_mm; } /** @@ -58,31 +68,33 @@ out: */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { + struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; + mutex_lock(&iommu_sva_lock); + /* Allocate mm->pasid if necessary. */ - ret = iommu_sva_alloc_pasid(mm, dev); - if (ret) - return ERR_PTR(ret); + iommu_mm = iommu_alloc_mm_data(mm, dev); + if (IS_ERR(iommu_mm)) { + ret = PTR_ERR(iommu_mm); + goto out_unlock; + } handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return ERR_PTR(-ENOMEM); - - mutex_lock(&iommu_sva_lock); - /* Search for an existing domain. */ - domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid, - IOMMU_DOMAIN_SVA); - if (IS_ERR(domain)) { - ret = PTR_ERR(domain); + if (!handle) { + ret = -ENOMEM; goto out_unlock; } - if (domain) { - domain->users++; - goto out; + /* Search for an existing domain. */ + list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + if (!ret) { + domain->users++; + goto out; + } } /* Allocate a new domain and set it on device pasid. */ @@ -92,23 +104,23 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } - ret = iommu_attach_device_pasid(domain, dev, mm->pasid); + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); if (ret) goto out_free_domain; domain->users = 1; + list_add(&domain->next, &mm->iommu_mm->sva_domains); + out: mutex_unlock(&iommu_sva_lock); handle->dev = dev; handle->domain = domain; - return handle; out_free_domain: iommu_domain_free(domain); + kfree(handle); out_unlock: mutex_unlock(&iommu_sva_lock); - kfree(handle); - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(iommu_sva_bind_device); @@ -124,12 +136,13 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device); void iommu_sva_unbind_device(struct iommu_sva *handle) { struct iommu_domain *domain = handle->domain; - ioasid_t pasid = domain->mm->pasid; + struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { - iommu_detach_device_pasid(domain, dev, pasid); + list_del(&domain->next); iommu_domain_free(domain); } mutex_unlock(&iommu_sva_lock); @@ -205,8 +218,11 @@ out_put_mm: void mm_pasid_drop(struct mm_struct *mm) { - if (likely(!mm_valid_pasid(mm))) + struct iommu_mm_data *iommu_mm = mm->iommu_mm; + + if (!iommu_mm) return; - iommu_free_global_pasid(mm->pasid); + iommu_free_global_pasid(iommu_mm->pasid); + kfree(iommu_mm); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f7b1b469e98d..c6bbbe0901d0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -121,6 +121,11 @@ struct iommu_domain { struct { /* IOMMU_DOMAIN_SVA */ struct mm_struct *mm; int users; + /* + * Next iommu_domain in mm->iommu_mm->sva-domains list + * protected by iommu_sva_lock. + */ + struct list_head next; }; }; }; @@ -1345,16 +1350,28 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream #ifdef CONFIG_IOMMU_MM_DATA static inline void mm_pasid_init(struct mm_struct *mm) { - mm->pasid = IOMMU_PASID_INVALID; + /* + * During dup_mm(), a new mm will be memcpy'd from an old one and that makes + * the new mm and the old one point to a same iommu_mm instance. When either + * one of the two mms gets released, the iommu_mm instance is freed, leaving + * the other mm running into a use-after-free/double-free problem. To avoid + * the problem, zeroing the iommu_mm pointer of a new mm is needed here. + */ + mm->iommu_mm = NULL; } + static inline bool mm_valid_pasid(struct mm_struct *mm) { - return mm->pasid != IOMMU_PASID_INVALID; + return READ_ONCE(mm->iommu_mm); } static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) { - return mm->pasid; + struct iommu_mm_data *iommu_mm = READ_ONCE(mm->iommu_mm); + + if (!iommu_mm) + return IOMMU_PASID_INVALID; + return iommu_mm->pasid; } void mm_pasid_drop(struct mm_struct *mm); -- cgit v1.2.3 From 1fa05c932dc71c474da38e4fd0456131128f8486 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 27 Oct 2023 08:05:25 +0800 Subject: mm: Deprecate pasid field Drop the pasid field, as all the information needed for sva domain management has been moved to the newly added iommu_mm field. Reviewed-by: Lu Baolu Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Signed-off-by: Tina Zhang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231027000525.1278806-7-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- include/linux/mm_types.h | 1 - mm/init-mm.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0b4314fab478..ec71c91e210b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -940,7 +940,6 @@ struct mm_struct { struct work_struct async_put_work; #ifdef CONFIG_IOMMU_MM_DATA - u32 pasid; struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM diff --git a/mm/init-mm.c b/mm/init-mm.c index c52dc2740a3d..24c809379274 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -44,9 +44,6 @@ struct mm_struct init_mm = { #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, -#ifdef CONFIG_IOMMU_MM_DATA - .pasid = IOMMU_PASID_INVALID, -#endif INIT_MM_CONTEXT(init_mm) }; -- cgit v1.2.3 From 4720287c7bf76e59d19d4dfbdc3f54eeea6fd46b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 Dec 2023 14:03:08 -0400 Subject: iommu: Remove struct iommu_ops *iommu from arch_setup_dma_ops() This is not being used to pass ops, it is just a way to tell if an iommu driver was probed. These days this can be detected directly via device_iommu_mapped(). Call device_iommu_mapped() in the two places that need to check it and remove the iommu parameter everywhere. Reviewed-by: Jerry Snitselaar Reviewed-by: Lu Baolu Reviewed-by: Moritz Fischer Acked-by: Christoph Hellwig Acked-by: Rob Herring Tested-by: Hector Martin Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v2-16e4def25ebb+820-iommu_fwspec_p1_jgg@nvidia.com Signed-off-by: Joerg Roedel --- arch/arc/mm/dma.c | 2 +- arch/arm/mm/dma-mapping-nommu.c | 2 +- arch/arm/mm/dma-mapping.c | 10 +++++----- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/mips/mm/dma-noncoherent.c | 2 +- arch/riscv/mm/dma-noncoherent.c | 2 +- drivers/acpi/scan.c | 3 +-- drivers/hv/hv_common.c | 2 +- drivers/of/device.c | 2 +- include/linux/dma-map-ops.h | 4 ++-- 10 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a7fbbb83b70..197707bc7658 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -91,7 +91,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, * Plug in direct dma map ops. */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { /* * IOC hardware snoops all DMA traffic keeping the caches consistent diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index cfd9c933d2f0..b94850b57995 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -34,7 +34,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { if (IS_ENABLED(CONFIG_CPU_V7M)) { /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5409225b4abc..6c359a3af8d9 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1713,7 +1713,7 @@ void arm_iommu_detach_device(struct device *dev) EXPORT_SYMBOL_GPL(arm_iommu_detach_device); static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { struct dma_iommu_mapping *mapping; @@ -1748,7 +1748,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) #else static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { } @@ -1757,7 +1757,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } #endif /* CONFIG_ARM_DMA_USE_IOMMU */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { /* * Due to legacy code that sets the ->dma_coherent flag from a bus @@ -1776,8 +1776,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (dev->dma_ops) return; - if (iommu) - arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent); + if (device_iommu_mapped(dev)) + arm_setup_iommu_dma_ops(dev, dma_base, size, coherent); xen_setup_dma_ops(dev); dev->archdata.dma_ops_setup = true; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..61886e43e3a1 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -47,7 +47,7 @@ void arch_teardown_dma_ops(struct device *dev) #endif void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { int cls = cache_line_size_of_cpu(); @@ -58,7 +58,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ARCH_DMA_MINALIGN, cls); dev->dma_coherent = coherent; - if (iommu) + if (device_iommu_mapped(dev)) iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); xen_setup_dma_ops(dev); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 3c4fc97b9f39..0f3cec663a12 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -138,7 +138,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { dev->dma_coherent = coherent; } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 4e4e469b8dd6..843107f834b2 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -129,7 +129,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent) + bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 02bb2cce423f..444a0b3c72f2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1641,8 +1641,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; - arch_setup_dma_ops(dev, 0, U64_MAX, - iommu, attr == DEV_DMA_COHERENT); + arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT); return 0; } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4372f5d146ab..0285a74363b3 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -488,7 +488,7 @@ void hv_setup_dma_ops(struct device *dev, bool coherent) * Hyper-V does not offer a vIOMMU in the guest * VM, so pass 0/NULL for the IOMMU settings */ - arch_setup_dma_ops(dev, 0, 0, NULL, coherent); + arch_setup_dma_ops(dev, 0, 0, coherent); } EXPORT_SYMBOL_GPL(hv_setup_dma_ops); diff --git a/drivers/of/device.c b/drivers/of/device.c index 1ca42ad9dd15..65c71be71a8d 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -193,7 +193,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); - arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + arch_setup_dma_ops(dev, dma_start, size, coherent); if (!iommu) of_dma_set_restricted_buffer(dev, np); diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a52e508d1869..e9cc317e9d7d 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -427,10 +427,10 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); + bool coherent); #else static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) + u64 size, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ -- cgit v1.2.3 From 6ff6e184f1f4d4993d45ca3f934c8288890965fe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 Dec 2023 14:03:09 -0400 Subject: iommmu/of: Do not return struct iommu_ops from of_iommu_configure() Nothing needs this pointer. Return a normal error code with the usual IOMMU semantic that ENODEV means 'there is no IOMMU driver'. Reviewed-by: Jerry Snitselaar Reviewed-by: Lu Baolu Acked-by: Rob Herring Tested-by: Hector Martin Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v2-16e4def25ebb+820-iommu_fwspec_p1_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 31 +++++++++++++++++++------------ drivers/of/device.c | 22 +++++++++++++++------- include/linux/of_iommu.h | 13 ++++++------- 3 files changed, 40 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 5ecca53847d3..c6510d7e7b24 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -107,16 +107,22 @@ static int of_iommu_configure_device(struct device_node *master_np, of_iommu_configure_dev(master_np, dev); } -const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +/* + * Returns: + * 0 on success, an iommu was configured + * -ENODEV if the device does not have any IOMMU + * -EPROBEDEFER if probing should be tried again + * -errno fatal errors + */ +int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id) { const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec; int err = NO_IOMMU; if (!master_np) - return NULL; + return -ENODEV; /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); @@ -124,7 +130,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, if (fwspec) { if (fwspec->ops) { mutex_unlock(&iommu_probe_device_lock); - return fwspec->ops; + return 0; } /* In the deferred case, start again from scratch */ iommu_fwspec_free(dev); @@ -169,14 +175,15 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err < 0) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; + if (err < 0) { + if (err == -EPROBE_DEFER) + return err; + dev_dbg(dev, "Adding to IOMMU failed: %pe\n", ERR_PTR(err)); + return err; } - - return ops; + if (!ops) + return -ENODEV; + return 0; } static enum iommu_resv_type __maybe_unused diff --git a/drivers/of/device.c b/drivers/of/device.c index 65c71be71a8d..873d933e8e6d 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -93,12 +93,12 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) int of_dma_configure_id(struct device *dev, struct device_node *np, bool force_dma, const u32 *id) { - const struct iommu_ops *iommu; const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 dma_start = 0; u64 mask, end, size = 0; bool coherent; + int iommu_ret; int ret; if (np == dev->of_node) @@ -181,21 +181,29 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sdma coherent\n", coherent ? " " : " not "); - iommu = of_iommu_configure(dev, np, id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) { + iommu_ret = of_iommu_configure(dev, np, id); + if (iommu_ret == -EPROBE_DEFER) { /* Don't touch range map if it wasn't set from a valid dma-ranges */ if (!ret) dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; - } + } else if (iommu_ret == -ENODEV) { + dev_dbg(dev, "device is not behind an iommu\n"); + } else if (iommu_ret) { + dev_err(dev, "iommu configuration for device failed with %pe\n", + ERR_PTR(iommu_ret)); - dev_dbg(dev, "device is%sbehind an iommu\n", - iommu ? " " : " not "); + /* + * Historically this routine doesn't fail driver probing + * due to errors in of_iommu_configure() + */ + } else + dev_dbg(dev, "device is behind an iommu\n"); arch_setup_dma_ops(dev, dma_start, size, coherent); - if (!iommu) + if (iommu_ret) of_dma_set_restricted_buffer(dev, np); return 0; diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 9a5e6b410dd2..e61cbbe12dac 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -8,20 +8,19 @@ struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id); +extern int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id); extern void of_iommu_get_resv_regions(struct device *dev, struct list_head *list); #else -static inline const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +static inline int of_iommu_configure(struct device *dev, + struct device_node *master_np, + const u32 *id) { - return NULL; + return -ENODEV; } static inline void of_iommu_get_resv_regions(struct device *dev, -- cgit v1.2.3 From eda1a94caf6b05482bbf57dc244e7a31a9dba77c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 Dec 2023 14:03:12 -0400 Subject: iommu: Mark dev_iommu_priv_set() with a lockdep A perfect driver would only call dev_iommu_priv_set() from its probe callback. We've made it functionally correct to call it from the of_xlate by adding a lock around that call. lockdep assert that iommu_probe_device_lock is held to discourage misuse. Exclude PPC kernels with CONFIG_FSL_PAMU turned on because FSL_PAMU uses a global static for its priv and abuses priv for its domain. Remove the pointless stores of NULL, all these are on paths where the core code will free dev->iommu after the op returns. Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Tested-by: Hector Martin Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v2-16e4def25ebb+820-iommu_fwspec_p1_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 -- drivers/iommu/apple-dart.c | 1 - drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 - drivers/iommu/intel/iommu.c | 2 -- drivers/iommu/iommu.c | 9 +++++++++ drivers/iommu/omap-iommu.c | 1 - include/linux/iommu.h | 5 +---- 8 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index fcc987f5d4ed..8199c678c2dc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -551,8 +551,6 @@ static void amd_iommu_uninit_device(struct device *dev) if (dev_data->domain) detach_device(dev); - dev_iommu_priv_set(dev, NULL); - /* * We keep dev_data around for unplugged devices and reuse it when the * device is re-plugged - not doing so would introduce a ton of races. diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index ee05f4824bfa..56cfc33042e0 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -740,7 +740,6 @@ static void apple_dart_release_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - dev_iommu_priv_set(dev, NULL); kfree(cfg); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fc4317c25b6d..1855d3892b15 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2695,7 +2695,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) err_free_master: kfree(master); - dev_iommu_priv_set(dev, NULL); return ERR_PTR(ret); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4d09c0047892..adc7937fd8a3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1420,7 +1420,6 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_rpm_put(cfg->smmu); - dev_iommu_priv_set(dev, NULL); kfree(cfg); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 897159dba47d..511589341074 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4461,7 +4461,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - dev_iommu_priv_set(dev, NULL); kfree(info); return ERR_PTR(ret); } @@ -4479,7 +4478,6 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); - dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index df58025c001b..68e648b55767 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -387,6 +387,15 @@ static u32 dev_iommu_get_max_pasids(struct device *dev) return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); } +void dev_iommu_priv_set(struct device *dev, void *priv) +{ + /* FSL_PAMU does something weird */ + if (!IS_ENABLED(CONFIG_FSL_PAMU)) + lockdep_assert_held(&iommu_probe_device_lock); + dev->iommu->priv = priv; +} +EXPORT_SYMBOL_GPL(dev_iommu_priv_set); + /* * Init the dev->iommu and dev->iommu_group in the struct device and get the * driver probed diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index c66b070841dd..c9528065a59a 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1719,7 +1719,6 @@ static void omap_iommu_release_device(struct device *dev) if (!dev->of_node || !arch_data) return; - dev_iommu_priv_set(dev, NULL); kfree(arch_data); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c6bbbe0901d0..3a556996fea7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -850,10 +850,7 @@ static inline void *dev_iommu_priv_get(struct device *dev) return NULL; } -static inline void dev_iommu_priv_set(struct device *dev, void *priv) -{ - dev->iommu->priv = priv; -} +void dev_iommu_priv_set(struct device *dev, void *priv); extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); -- cgit v1.2.3