From 3e8e986ce8a0d5fcf5479212d0c1ece4626c4a27 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:53:54 -0300 Subject: iommu/pages: Remove iommu_free_page() Use iommu_free_pages() instead. Reviewed-by: Lu Baolu Reviewed-by: Mostafa Saleh Tested-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..e66fd919501f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -571,17 +571,17 @@ static void free_context_table(struct intel_iommu *iommu) for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) - iommu_free_page(context); + iommu_free_pages(context); if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); if (context) - iommu_free_page(context); + iommu_free_pages(context); } - iommu_free_page(iommu->root_entry); + iommu_free_pages(iommu->root_entry); iommu->root_entry = NULL; } @@ -745,7 +745,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, tmp = 0ULL; if (!try_cmpxchg64(&pte->val, &tmp, pteval)) /* Someone else set it while we were thinking; use theirs. */ - iommu_free_page(tmp_page); + iommu_free_pages(tmp_page); else domain_flush_cache(domain, pte, sizeof(*pte)); } @@ -858,7 +858,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - iommu_free_page(level_pte); + iommu_free_pages(level_pte); } next: pfn += level_size(level); @@ -882,7 +882,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - iommu_free_page(domain->pgd); + iommu_free_pages(domain->pgd); domain->pgd = NULL; } } -- cgit v1.2.3 From 868240c34eb113e4121d4ad8ad7458d8caad87d3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:53:59 -0300 Subject: iommu: Change iommu_iotlb_gather to use iommu_page_list This converts the remaining places using list of pages to the new API. The Intel free path was shared with its gather path, so it is converted at the same time. Reviewed-by: Lu Baolu Tested-by: Nicolin Chen Tested-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 9 +++++---- drivers/iommu/intel/iommu.c | 24 ++++++++++++------------ include/linux/iommu.h | 4 ++-- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a775e4dbe06f..0af1ab36283c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -105,7 +105,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup); struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; - struct list_head freelist; + struct iommu_pages_list freelist; u64 counter; /* Flush counter when this entry was added */ }; @@ -192,7 +192,7 @@ static void fq_flush_timeout(struct timer_list *t) static void queue_iova(struct iommu_dma_cookie *cookie, unsigned long pfn, unsigned long pages, - struct list_head *freelist) + struct iommu_pages_list *freelist) { struct iova_fq *fq; unsigned long flags; @@ -231,7 +231,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie, fq->entries[idx].iova_pfn = pfn; fq->entries[idx].pages = pages; fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); - list_splice(freelist, &fq->entries[idx].freelist); + iommu_pages_list_splice(freelist, &fq->entries[idx].freelist); spin_unlock_irqrestore(&fq->lock, flags); @@ -289,7 +289,8 @@ static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size) spin_lock_init(&fq->lock); for (i = 0; i < fq_size; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); + fq->entries[i].freelist = + IOMMU_PAGES_LIST_INIT(fq->entries[i].freelist); } static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e66fd919501f..0de9580372a2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -894,18 +894,16 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, The 'pte' argument is the *parent* PTE, pointing to the page that is to be freed. */ static void dma_pte_list_pagetables(struct dmar_domain *domain, - int level, struct dma_pte *pte, - struct list_head *freelist) + int level, struct dma_pte *parent_pte, + struct iommu_pages_list *freelist) { - struct page *pg; + struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte)); - pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); - list_add_tail(&pg->lru, freelist); + iommu_pages_list_add(freelist, pte); if (level == 1) return; - pte = page_address(pg); do { if (dma_pte_present(pte) && !dma_pte_superpage(pte)) dma_pte_list_pagetables(domain, level - 1, pte, freelist); @@ -916,7 +914,7 @@ static void dma_pte_list_pagetables(struct dmar_domain *domain, static void dma_pte_clear_level(struct dmar_domain *domain, int level, struct dma_pte *pte, unsigned long pfn, unsigned long start_pfn, unsigned long last_pfn, - struct list_head *freelist) + struct iommu_pages_list *freelist) { struct dma_pte *first_pte = NULL, *last_pte = NULL; @@ -961,7 +959,8 @@ next: the page tables, and may have cached the intermediate levels. The pages can only be freed after the IOTLB flush has been done. */ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, - unsigned long last_pfn, struct list_head *freelist) + unsigned long last_pfn, + struct iommu_pages_list *freelist) { if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || WARN_ON(start_pfn > last_pfn)) @@ -973,8 +972,7 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - struct page *pgd_page = virt_to_page(domain->pgd); - list_add_tail(&pgd_page->lru, freelist); + iommu_pages_list_add(freelist, domain->pgd); domain->pgd = NULL; } } @@ -1449,7 +1447,8 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) static void domain_exit(struct dmar_domain *domain) { if (domain->pgd) { - LIST_HEAD(freelist); + struct iommu_pages_list freelist = + IOMMU_PAGES_LIST_INIT(freelist); domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); iommu_put_pages_list(&freelist); @@ -3603,7 +3602,8 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { cache_tag_flush_range(to_dmar_domain(domain), gather->start, - gather->end, list_empty(&gather->freelist)); + gather->end, + iommu_pages_list_empty(&gather->freelist)); iommu_put_pages_list(&gather->freelist); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6dd3a221c5bc..3fb62165db19 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -375,7 +375,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; - struct list_head freelist; + struct iommu_pages_list freelist; bool queued; }; @@ -864,7 +864,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { .start = ULONG_MAX, - .freelist = LIST_HEAD_INIT(gather->freelist), + .freelist = IOMMU_PAGES_LIST_INIT(gather->freelist), }; } -- cgit v1.2.3 From 5087f663c21e8ac2af7bd178a9b8da50ff7752ac Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:54:09 -0300 Subject: iommu/pages: Remove iommu_alloc_page_node() Use iommu_alloc_pages_node_sz() instead. AMD and Intel are both using 4K pages for these structures since those drivers only work on 4K PAGE_SIZE. riscv is also spec'd to use SZ_4K. Reviewed-by: Lu Baolu Tested-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/21-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 8 +++++--- drivers/iommu/amd/io_pgtable_v2.c | 4 ++-- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/intel/iommu.c | 13 ++++++++----- drivers/iommu/intel/pasid.c | 3 ++- drivers/iommu/iommu-pages.h | 13 ------------- drivers/iommu/riscv/iommu.c | 7 ++++--- 7 files changed, 22 insertions(+), 28 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 97ddadbcfb54..4d308c071134 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -114,7 +114,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(cfg->amd.nid, gfp); + pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K); if (!pte) return false; @@ -206,7 +206,8 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(cfg->amd.nid, gfp); + page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, + SZ_4K); if (!page) return NULL; @@ -535,7 +536,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->root = + iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->root) return NULL; pgtable->mode = PAGE_MODE_3_LEVEL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index f5f2f5327378..b47941353ccb 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -152,7 +152,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = iommu_alloc_page_node(nid, gfp); + page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K); if (!page) return NULL; @@ -346,7 +346,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index fe31166ced94..ed04d1471c24 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1884,7 +1884,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, return -ENOSPC; gcr3_info->domid = domid; - gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); + gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K); if (gcr3_info->gcr3_tbl == NULL) { pdom_id_free(domid); return -ENOMEM; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0de9580372a2..5d5291887516 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -397,7 +397,8 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); + context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, + SZ_4K); if (!context) return NULL; @@ -731,7 +732,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval, tmp; - tmp_page = iommu_alloc_page_node(domain->nid, gfp); + tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp, + SZ_4K); if (!tmp_page) return NULL; @@ -982,7 +984,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); + root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2026,7 +2028,8 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL); + new_ce = iommu_alloc_pages_node_sz(iommu->node, + GFP_KERNEL, SZ_4K); if (!new_ce) goto out_unmap; @@ -3359,7 +3362,7 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); /* always allocate the top pgd */ - domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL); + domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K); if (!domain->pgd) { kfree(domain); return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index b616aaad03ba..728da85a9100 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -147,7 +147,8 @@ retry: if (!entries) { u64 tmp; - entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); + entries = iommu_alloc_pages_node_sz(info->iommu->node, + GFP_ATOMIC, SZ_4K); if (!entries) return NULL; diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index 4513fbc76260..7ece83bb0f54 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -114,17 +114,4 @@ static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size) return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size); } -/** - * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. - * @nid: memory NUMA node id - * @gfp: buddy allocator flags - * - * returns the virtual address of the allocated page - * Prefer to use iommu_alloc_pages_node_lg2() - */ -static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) -{ - return iommu_alloc_pages_node_sz(nid, gfp, PAGE_SIZE); -} - #endif /* __IOMMU_PAGES_H */ diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 8835c82f118d..bb57092ca901 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1144,7 +1144,8 @@ pte_retry: * page table. This might race with other mappings, retry. */ if (_io_pte_none(pte)) { - addr = iommu_alloc_page_node(domain->numa_node, gfp); + addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp, + SZ_4K); if (!addr) return NULL; old = pte; @@ -1385,8 +1386,8 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) domain->numa_node = dev_to_node(iommu->dev); domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD); domain->pgd_mode = pgd_mode; - domain->pgd_root = iommu_alloc_page_node(domain->numa_node, - GFP_KERNEL_ACCOUNT); + domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node, + GFP_KERNEL_ACCOUNT, SZ_4K); if (!domain->pgd_root) { kfree(domain); return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 4f1492efb495bcef34c9ee8a94af81e6cea5abf4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Apr 2025 15:36:08 +0800 Subject: iommu/vt-d: Revert ATS timing change to fix boot failure Commit <5518f239aff1> ("iommu/vt-d: Move scalable mode ATS enablement to probe path") changed the PCI ATS enablement logic to run earlier, specifically before the default domain attachment. On some client platforms, this change resulted in boot failures, causing the kernel to panic with the following message and call trace: Kernel panic - not syncing: DMAR hardware is malfunctioning CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc3+ #175 Call Trace: dump_stack_lvl+0x6f/0xb0 dump_stack+0x10/0x16 panic+0x10a/0x2b7 iommu_enable_translation.cold+0xc/0xc intel_iommu_init+0xe39/0xec0 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_pci_iommu_init+0x10/0x10 pci_iommu_init+0xd/0x40 do_one_initcall+0x5b/0x390 kernel_init_freeable+0x26d/0x2b0 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x15/0x120 ret_from_fork+0x35/0x60 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1a/0x30 RIP: 1f0f:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0000:0000000000000000 EFLAGS: 841f0f2e66 ORIG_RAX: 1f0f2e6600000000 RAX: 0000000000000000 RBX: 1f0f2e6600000000 RCX: 2e66000000000084 RDX: 0000000000841f0f RSI: 000000841f0f2e66 RDI: 00841f0f2e660000 RBP: 00841f0f2e660000 R08: 00841f0f2e660000 R09: 000000841f0f2e66 R10: 0000000000841f0f R11: 2e66000000000084 R12: 000000841f0f2e66 R13: 0000000000841f0f R14: 2e66000000000084 R15: 1f0f2e6600000000 ---[ end Kernel panic - not syncing: DMAR hardware is malfunctioning ]--- Fix this by reverting the timing change for ATS enablement introduced by the offending commit and restoring the previous behavior. Fixes: 5518f239aff1 ("iommu/vt-d: Move scalable mode ATS enablement to probe path") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/linux-iommu/01b9c72f-460d-4f77-b696-54c6825babc9@linux.intel.com/ Signed-off-by: Lu Baolu Tested-by: Jarkko Nikula Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250416073608.1799578-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..e60b699e7b8c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,6 +3785,22 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + /* * The PCIe spec, in its wisdom, declares that the behaviour of the * device is undefined if you enable PASID support after ATS support. @@ -3792,22 +3808,12 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) * we can't yet know if we're ever going to use it. */ if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) iommu_enable_pci_ats(info); iommu_enable_pci_pri(info); - - return &iommu->iommu; -free_table: - intel_pasid_free_table(dev); -clear_rbtree: - device_rbtree_remove(info); -free: - kfree(info); - - return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) @@ -4391,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, -- cgit v1.2.3 From 2c8a7c66c90832432496616a9a3c07293f1364f3 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e60b699e7b8c..cb0b993bebb4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4439,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4516,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); -- cgit v1.2.3 From 7c8896dd4a2a27c84b04dcf0990e6f6b118cb6b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Apr 2025 16:01:24 +0800 Subject: iommu: Remove IOMMU_DEV_FEAT_SVA None of the drivers implement anything here anymore, remove the dead code. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Link: https://lore.kernel.org/r/20250418080130.1844424-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/accel/amdxdna/aie2_pci.c | 13 ++----------- drivers/dma/idxd/init.c | 8 +------- drivers/iommu/amd/iommu.c | 2 -- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 -- drivers/iommu/intel/iommu.c | 6 ------ drivers/iommu/iommu-sva.c | 3 --- drivers/misc/uacce/uacce.c | 9 --------- include/linux/iommu.h | 9 +-------- 8 files changed, 4 insertions(+), 48 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 5a058e565b01..c6cf7068d23c 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -512,12 +512,6 @@ static int aie2_init(struct amdxdna_dev *xdna) goto release_fw; } - ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); - if (ret) { - XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret); - goto free_irq; - } - psp_conf.fw_size = fw->size; psp_conf.fw_buf = fw->data; for (i = 0; i < PSP_MAX_REGS; i++) @@ -526,14 +520,14 @@ static int aie2_init(struct amdxdna_dev *xdna) if (!ndev->psp_hdl) { XDNA_ERR(xdna, "failed to create psp"); ret = -ENOMEM; - goto disable_sva; + goto free_irq; } xdna->dev_handle = ndev; ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "start npu failed, ret %d", ret); - goto disable_sva; + goto free_irq; } ret = aie2_mgmt_fw_query(ndev); @@ -584,8 +578,6 @@ async_event_free: aie2_error_async_events_free(ndev); stop_hw: aie2_hw_stop(xdna); -disable_sva: - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); free_irq: pci_free_irq_vectors(pdev); release_fw: @@ -601,7 +593,6 @@ static void aie2_fini(struct amdxdna_dev *xdna) aie2_hw_stop(xdna); aie2_error_async_events_free(ndev); - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); pci_free_irq_vectors(pdev); } diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fca1d2924999..2d3d580b9987 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -633,17 +633,11 @@ static int idxd_enable_sva(struct pci_dev *pdev) ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); if (ret) return ret; - - ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); - if (ret) - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); - - return ret; + return 0; } static void idxd_disable_sva(struct pci_dev *pdev) { - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index dea0fed7abb0..17aab6d04a13 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2991,7 +2991,6 @@ static int amd_iommu_dev_enable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: break; default: ret = -EINVAL; @@ -3007,7 +3006,6 @@ static int amd_iommu_dev_disable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: break; default: ret = -EINVAL; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4c08560409cc..cddd7442661f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3653,7 +3653,6 @@ static int arm_smmu_dev_enable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: return 0; default: return -EINVAL; @@ -3670,7 +3669,6 @@ static int arm_smmu_dev_disable_feature(struct device *dev, switch (feat) { case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: return 0; default: return -EINVAL; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5d5291887516..37f2403ed677 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3950,9 +3950,6 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) case IOMMU_DEV_FEAT_IOPF: return intel_iommu_enable_iopf(dev); - case IOMMU_DEV_FEAT_SVA: - return 0; - default: return -ENODEV; } @@ -3966,9 +3963,6 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) intel_iommu_disable_iopf(dev); return 0; - case IOMMU_DEV_FEAT_SVA: - return 0; - default: return -ENODEV; } diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index ab18bc494eef..944daa0dabd6 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -63,9 +63,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de * reference is taken. Caller must call iommu_sva_unbind_device() * to release each reference. * - * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to - * initialize the required SVA features. - * * On error, returns an ERR_PTR value. */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index bdc2e6fda782..2a1db2abeeca 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -479,14 +479,6 @@ static unsigned int uacce_enable_sva(struct device *parent, unsigned int flags) dev_err(parent, "failed to enable IOPF feature! ret = %pe\n", ERR_PTR(ret)); return flags; } - - ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA); - if (ret) { - dev_err(parent, "failed to enable SVA feature! ret = %pe\n", ERR_PTR(ret)); - iommu_dev_disable_feature(parent, IOMMU_DEV_FEAT_IOPF); - return flags; - } - return flags | UACCE_DEV_SVA; } @@ -495,7 +487,6 @@ static void uacce_disable_sva(struct uacce_device *uacce) if (!(uacce->flags & UACCE_DEV_SVA)) return; - iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_IOPF); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 062818b51822..f39af28acaeb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -318,18 +318,11 @@ struct iommu_iort_rmr_data { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses - * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally - * enabling %IOMMU_DEV_FEAT_SVA requires - * %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page - * Faults themselves instead of relying on the IOMMU. When - * supported, this feature must be enabled before and - * disabled after %IOMMU_DEV_FEAT_SVA. + * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. * * Device drivers enable a feature using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; -- cgit v1.2.3 From 17fce9d2336d952b95474248303e5e7d9777f2e0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 18 Apr 2025 16:01:25 +0800 Subject: iommu/vt-d: Put iopf enablement in domain attach path Update iopf enablement in the driver to use the new method, similar to the arm-smmu-v3 driver. Enable iopf support when any domain with an iopf_handler is attached, and disable it when the domain is removed. Place all the logic for controlling the PRI and iopf queue in the domain set/remove/replace paths. Keep track of the number of domains set to the device and PASIDs that require iopf. When the first domain requiring iopf is attached, add the device to the iopf queue and enable PRI. When the last domain is removed, remove it from the iopf queue and disable PRI. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Link: https://lore.kernel.org/r/20250418080130.1844424-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 42 ++++++++++++++++++++++++++++++++++++------ drivers/iommu/intel/iommu.h | 33 +++++++++++++++++++++++++++++++++ drivers/iommu/intel/nested.c | 16 ++++++++++++++-- drivers/iommu/intel/svm.c | 9 +++++++-- 4 files changed, 90 insertions(+), 10 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 37f2403ed677..a18e7504c7d1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3284,6 +3284,9 @@ void device_block_translation(struct device *dev) static int blocking_domain_attach_dev(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + + iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev); device_block_translation(dev); return 0; } @@ -3494,7 +3497,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - return dmar_domain_attach_device(to_dmar_domain(domain), dev); + ret = iopf_for_domain_set(domain, dev); + if (ret) + return ret; + + ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); + if (ret) + iopf_for_domain_remove(domain, dev); + + return ret; } static int intel_iommu_map(struct iommu_domain *domain, @@ -3915,6 +3926,8 @@ int intel_iommu_enable_iopf(struct device *dev) if (!info->pri_enabled) return -ENODEV; + /* pri_enabled is protected by the group mutex. */ + iommu_group_mutex_assert(dev); if (info->iopf_refcount) { info->iopf_refcount++; return 0; @@ -3937,6 +3950,7 @@ void intel_iommu_disable_iopf(struct device *dev) if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) return; + iommu_group_mutex_assert(dev); if (--info->iopf_refcount) return; @@ -3948,8 +3962,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_enable_iopf(dev); - + return 0; default: return -ENODEV; } @@ -3960,7 +3973,6 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - intel_iommu_disable_iopf(dev); return 0; default: @@ -4041,6 +4053,7 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); + iopf_for_domain_remove(old, dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); domain_remove_dev_pasid(old, dev, pasid); @@ -4114,6 +4127,10 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid, old); @@ -4121,7 +4138,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, ret = domain_setup_second_level(iommu, dmar_domain, dev, pasid, old); if (ret) - goto out_remove_dev_pasid; + goto out_unwind_iopf; domain_remove_dev_pasid(old, dev, pasid); @@ -4129,6 +4146,8 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, return 0; +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; @@ -4343,6 +4362,11 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device if (dev_is_real_dma_subdevice(dev)) return 0; + /* + * No PRI support with the global identity domain. No need to enable or + * disable PRI in this path as the iommu has been put in the blocking + * state. + */ if (sm_supported(iommu)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else @@ -4362,10 +4386,16 @@ static int identity_domain_set_dev_pasid(struct iommu_domain *domain, if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) return -EOPNOTSUPP; - ret = domain_setup_passthrough(iommu, dev, pasid, old); + ret = iopf_for_domain_replace(domain, old, dev); if (ret) return ret; + ret = domain_setup_passthrough(iommu, dev, pasid, old); + if (ret) { + iopf_for_domain_replace(old, domain, dev); + return ret; + } + domain_remove_dev_pasid(old, dev, pasid); return 0; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8d5d85bf0080..f9dcf0ab93d1 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1297,6 +1297,39 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); int intel_iommu_enable_iopf(struct device *dev); void intel_iommu_disable_iopf(struct device *dev); +static inline int iopf_for_domain_set(struct iommu_domain *domain, + struct device *dev) +{ + if (!domain || !domain->iopf_handler) + return 0; + + return intel_iommu_enable_iopf(dev); +} + +static inline void iopf_for_domain_remove(struct iommu_domain *domain, + struct device *dev) +{ + if (!domain || !domain->iopf_handler) + return; + + intel_iommu_disable_iopf(dev); +} + +static inline int iopf_for_domain_replace(struct iommu_domain *new, + struct iommu_domain *old, + struct device *dev) +{ + int ret; + + ret = iopf_for_domain_set(new, dev); + if (ret) + return ret; + + iopf_for_domain_remove(old, dev); + + return 0; +} + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); struct iommu_domain *intel_svm_domain_alloc(struct device *dev, diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 6ac5c534bef4..d2a94025d0a0 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -56,10 +56,14 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, if (ret) goto detach_iommu; + ret = iopf_for_domain_set(domain, dev); + if (ret) + goto unassign_tag; + ret = intel_pasid_setup_nested(iommu, dev, IOMMU_NO_PASID, dmar_domain); if (ret) - goto unassign_tag; + goto disable_iopf; info->domain = dmar_domain; spin_lock_irqsave(&dmar_domain->lock, flags); @@ -67,6 +71,8 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, spin_unlock_irqrestore(&dmar_domain->lock, flags); return 0; +disable_iopf: + iopf_for_domain_remove(domain, dev); unassign_tag: cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); detach_iommu: @@ -166,14 +172,20 @@ static int intel_nested_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); - ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old); + ret = iopf_for_domain_replace(domain, old, dev); if (ret) goto out_remove_dev_pasid; + ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old); + if (ret) + goto out_unwind_iopf; + domain_remove_dev_pasid(old, dev, pasid); return 0; +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ba93123cb4eb..f3da596410b5 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -164,18 +164,23 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = __domain_setup_first_level(iommu, dev, pasid, FLPT_DEFAULT_DID, mm->pgd, sflags, old); if (ret) - goto out_remove_dev_pasid; + goto out_unwind_iopf; domain_remove_dev_pasid(old, dev, pasid); return 0; - +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; -- cgit v1.2.3 From f984fb09e60e0e2db5ad5be9d4cfcefb2008fda1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 18 Apr 2025 16:01:30 +0800 Subject: iommu: Remove iommu_dev_enable/disable_feature() No external drivers use these interfaces anymore. Furthermore, no existing iommu drivers implement anything in the callbacks. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Zhangfei Gao Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/20250418080130.1844424-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 32 --------------------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 34 ----------------------------- drivers/iommu/intel/iommu.c | 25 --------------------- drivers/iommu/iommu.c | 32 --------------------------- include/linux/iommu.h | 28 ------------------------ 5 files changed, 151 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 17aab6d04a13..a9d539fce982 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2984,36 +2984,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; -static int amd_iommu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - -static int amd_iommu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .blocked_domain = &blocked_domain, @@ -3027,8 +2997,6 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .def_domain_type = amd_iommu_def_domain_type, - .dev_enable_feat = amd_iommu_dev_enable_feature, - .dev_disable_feat = amd_iommu_dev_disable_feature, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index cddd7442661f..09d51efd9095 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3643,38 +3643,6 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static int arm_smmu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -ENODEV; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return 0; - default: - return -EINVAL; - } -} - -static int arm_smmu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -EINVAL; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return 0; - default: - return -EINVAL; - } -} - /* * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the * PCIe link and save the data to memory by DMA. The hardware is restricted to @@ -3707,8 +3675,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .dev_enable_feat = arm_smmu_dev_enable_feature, - .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .viommu_alloc = arm_vsmmu_alloc, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a18e7504c7d1..e9ffe0a95944 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3957,29 +3957,6 @@ void intel_iommu_disable_iopf(struct device *dev) iopf_queue_remove_device(iommu->iopf_queue, dev); } -static int -intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) -{ - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return 0; - default: - return -ENODEV; - } -} - -static int -intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) -{ - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return 0; - - default: - return -ENODEV; - } -} - static bool intel_iommu_is_attach_deferred(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); @@ -4421,8 +4398,6 @@ const struct iommu_ops intel_iommu_ops = { .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, - .dev_enable_feat = intel_iommu_dev_enable_feat, - .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, .pgsize_bitmap = SZ_4K, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 264383b507bc..df283bddf6cd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2910,38 +2910,6 @@ int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); -/* - * Per device IOMMU features. - */ -int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) -{ - if (dev_has_iommu(dev)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->dev_enable_feat) - return ops->dev_enable_feat(dev, feat); - } - - return -ENODEV; -} -EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); - -/* - * The device drivers should do the necessary cleanups before calling this. - */ -int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) -{ - if (dev_has_iommu(dev)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->dev_disable_feat) - return ops->dev_disable_feat(dev, feat); - } - - return -EBUSY; -} -EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); - /** * iommu_setup_default_domain - Set the default_domain for the group * @group: Group to change diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f39af28acaeb..8d9119b000fe 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -316,16 +316,6 @@ struct iommu_iort_rmr_data { u32 num_sids; }; -/** - * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. - * - * Device drivers enable a feature using iommu_dev_enable_feature(). - */ -enum iommu_dev_features { - IOMMU_DEV_FEAT_IOPF, -}; - #define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ #define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) @@ -657,9 +647,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); - int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - void (*page_response)(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); @@ -1128,9 +1115,6 @@ void dev_iommu_priv_set(struct device *dev, void *priv); extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); -int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); -int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -1415,18 +1399,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline int -iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - -static inline int -iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; -- cgit v1.2.3 From 6f5dc7658094610debe2fad1b09a2034e14857b1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 May 2025 11:07:35 +0800 Subject: iommu/vt-d: Restore WO permissions on second-level paging entries VT-D HW can do WO permissions on the second-stage but not the first-stage page table formats. The commit eea53c581688 ("iommu/vt-d: Remove WO permissions on second-level paging entries") wanted to make this uniform for VT-D by disabling the support for WO permissions in the second-stage. This isn't consistent with how other drivers are working. Instead if the underlying HW can support WO, it should. For instance AMD already supports WO on its second stage (v1) format and not its first (v2). If WO support needs to be discoverable it should be done through an iommu_domain capability flag. Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v1-c26553717e90+65f-iommu_vtd_ss_wo_jgg@nvidia.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..bdf771bc4a26 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1681,9 +1681,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); - attr |= DMA_FL_PTE_PRESENT; if (domain->use_first_level) { - attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; } -- cgit v1.2.3 From f93b4ac5929a5754fc4edba26b43d0c9fa57d576 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 13 May 2025 11:07:36 +0800 Subject: iommu/vt-d: Use ida to manage domain id Switch the intel iommu driver to use the ida mechanism for managing domain IDs, replacing the previous fixed-size bitmap. The previous approach allocated a bitmap large enough to cover the maximum number of domain IDs supported by the hardware, regardless of the actual number of domains in use. This led to unnecessary memory consumption, especially on systems supporting a large number of iommu units but only utilizing a small number of domain IDs. The ida allocator dynamically manages the allocation and freeing of integer IDs, only consuming memory for the IDs that are currently in use. This significantly optimizes memory usage compared to the fixed-size bitmap. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250430021135.2370244-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 ++ drivers/iommu/intel/iommu.c | 80 +++++++++------------------------------------ drivers/iommu/intel/iommu.h | 19 ++++++++--- 3 files changed, 34 insertions(+), 68 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index e540092d664d..0e35969c026b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1099,6 +1099,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) spin_lock_init(&iommu->device_rbtree_lock); mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; + spin_lock_init(&iommu->lock); + ida_init(&iommu->domain_ida); ver = readl(iommu->reg + DMAR_VER_REG); pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", @@ -1195,6 +1197,7 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->reg) unmap_iommu(iommu); + ida_destroy(&iommu->domain_ida); ida_free(&dmar_seq_ids, iommu->seq_id); kfree(iommu); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bdf771bc4a26..023105234139 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1289,52 +1289,13 @@ static void iommu_disable_translation(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static int iommu_init_domains(struct intel_iommu *iommu) -{ - u32 ndomains; - - ndomains = cap_ndoms(iommu->cap); - pr_debug("%s: Number of Domains supported <%d>\n", - iommu->name, ndomains); - - spin_lock_init(&iommu->lock); - - iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL); - if (!iommu->domain_ids) - return -ENOMEM; - - /* - * If Caching mode is set, then invalid translations are tagged - * with domain-id 0, hence we need to pre-allocate it. We also - * use domain-id 0 as a marker for non-allocated domain-id, so - * make sure it is not used for a real domain. - */ - set_bit(0, iommu->domain_ids); - - /* - * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid - * entry for first-level or pass-through translation modes should - * be programmed with a domain id different from those used for - * second-level or nested translation. We reserve a domain id for - * this purpose. This domain id is also used for identity domain - * in legacy mode. - */ - set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); - - return 0; -} - static void disable_dmar_iommu(struct intel_iommu *iommu) { - if (!iommu->domain_ids) - return; - /* * All iommu domains must have been detached from the devices, * hence there should be no domain IDs in use. */ - if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap)) - > NUM_RESERVED_DID)) + if (WARN_ON(!ida_is_empty(&iommu->domain_ida))) return; if (iommu->gcmd & DMA_GCMD_TE) @@ -1343,11 +1304,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { - if (iommu->domain_ids) { - bitmap_free(iommu->domain_ids); - iommu->domain_ids = NULL; - } - if (iommu->copied_tables) { bitmap_free(iommu->copied_tables); iommu->copied_tables = NULL; @@ -1380,7 +1336,6 @@ static bool first_level_by_default(struct intel_iommu *iommu) int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { struct iommu_domain_info *info, *curr; - unsigned long ndomains; int num, ret = -ENOSPC; if (domain->domain.type == IOMMU_DOMAIN_SVA) @@ -1399,14 +1354,13 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) return 0; } - ndomains = cap_ndoms(iommu->cap); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { + num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, + cap_ndoms(iommu->cap) - 1, GFP_ATOMIC); + if (num < 0) { pr_err("%s: No free domain ids\n", iommu->name); goto err_unlock; } - set_bit(num, iommu->domain_ids); info->refcnt = 1; info->did = num; info->iommu = iommu; @@ -1421,7 +1375,7 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) return 0; err_clear: - clear_bit(info->did, iommu->domain_ids); + ida_free(&iommu->domain_ida, info->did); err_unlock: spin_unlock(&iommu->lock); kfree(info); @@ -1438,7 +1392,7 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) spin_lock(&iommu->lock); info = xa_load(&domain->iommu_array, iommu->seq_id); if (--info->refcnt == 0) { - clear_bit(info->did, iommu->domain_ids); + ida_free(&iommu->domain_ida, info->did); xa_erase(&domain->iommu_array, iommu->seq_id); domain->nid = NUMA_NO_NODE; kfree(info); @@ -2041,7 +1995,7 @@ static int copy_context_table(struct intel_iommu *iommu, did = context_domain_id(&ce); if (did >= 0 && did < cap_ndoms(iommu->cap)) - set_bit(did, iommu->domain_ids); + ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL); set_context_copied(iommu, bus, devfn); new_ce[idx] = ce; @@ -2168,11 +2122,6 @@ static int __init init_dmars(void) } intel_iommu_init_qi(iommu); - - ret = iommu_init_domains(iommu); - if (ret) - goto free_iommu; - init_translation_status(iommu); if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { @@ -2650,9 +2599,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - ret = iommu_init_domains(iommu); - if (ret == 0) - ret = iommu_alloc_root_entry(iommu); + ret = iommu_alloc_root_entry(iommu); if (ret) goto out; @@ -2971,9 +2918,14 @@ static ssize_t domains_used_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sysfs_emit(buf, "%d\n", - bitmap_weight(iommu->domain_ids, - cap_ndoms(iommu->cap))); + unsigned int count = 0; + int id; + + for (id = 0; id < cap_ndoms(iommu->cap); id++) + if (ida_exists(&iommu->domain_ida, id)) + count++; + + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR_RO(domains_used); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c4916886da5a..25faf3aadd24 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -722,7 +722,7 @@ struct intel_iommu { unsigned char name[16]; /* Device Name */ #ifdef CONFIG_INTEL_IOMMU - unsigned long *domain_ids; /* bitmap of domains */ + struct ida domain_ida; /* domain id allocator */ unsigned long *copied_tables; /* bitmap of copied tables */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ @@ -809,11 +809,22 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) } /* - * Domain ID reserved for pasid entries programmed for first-level - * only and pass-through transfer modes. + * Domain ID 0 and 1 are reserved: + * + * If Caching mode is set, then invalid translations are tagged + * with domain-id 0, hence we need to pre-allocate it. We also + * use domain-id 0 as a marker for non-allocated domain-id, so + * make sure it is not used for a real domain. + * + * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid + * entry for first-level or pass-through translation modes should + * be programmed with a domain id different from those used for + * second-level or nested translation. We reserve a domain id for + * this purpose. This domain id is also used for identity domain + * in legacy mode. */ #define FLPT_DEFAULT_DID 1 -#define NUM_RESERVED_DID 2 +#define IDA_START_DID 2 /* Retrieve the domain ID which has allocated to the domain */ static inline u16 -- cgit v1.2.3 From 868720fe15d1a6a1bddc28ad7b7e4b62448ee36b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 13 May 2025 11:07:37 +0800 Subject: iommu/vt-d: Replace spin_lock with mutex to protect domain ida The domain ID allocator is currently protected by a spin_lock. However, ida_alloc_range can potentially block if it needs to allocate memory to grow its internal structures. Replace the spin_lock with a mutex which allows sleep on block. Thus, the memory allocation flags can be updated from GFP_ATOMIC to GFP_KERNEL to allow blocking memory allocations if necessary. Introduce a new mutex, did_lock, specifically for protecting the domain ida. The existing spinlock will remain for protecting other intel_iommu fields. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250430021135.2370244-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + drivers/iommu/intel/iommu.c | 12 ++++-------- drivers/iommu/intel/iommu.h | 2 ++ 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 0e35969c026b..9e17e8e56308 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1101,6 +1101,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->node = NUMA_NO_NODE; spin_lock_init(&iommu->lock); ida_init(&iommu->domain_ida); + mutex_init(&iommu->did_lock); ver = readl(iommu->reg + DMAR_VER_REG); pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 023105234139..0f2845147058 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1345,17 +1345,16 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (!info) return -ENOMEM; - spin_lock(&iommu->lock); + guard(mutex)(&iommu->did_lock); curr = xa_load(&domain->iommu_array, iommu->seq_id); if (curr) { curr->refcnt++; - spin_unlock(&iommu->lock); kfree(info); return 0; } num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, - cap_ndoms(iommu->cap) - 1, GFP_ATOMIC); + cap_ndoms(iommu->cap) - 1, GFP_KERNEL); if (num < 0) { pr_err("%s: No free domain ids\n", iommu->name); goto err_unlock; @@ -1365,19 +1364,17 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) info->did = num; info->iommu = iommu; curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, - NULL, info, GFP_ATOMIC); + NULL, info, GFP_KERNEL); if (curr) { ret = xa_err(curr) ? : -EBUSY; goto err_clear; } - spin_unlock(&iommu->lock); return 0; err_clear: ida_free(&iommu->domain_ida, info->did); err_unlock: - spin_unlock(&iommu->lock); kfree(info); return ret; } @@ -1389,7 +1386,7 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (domain->domain.type == IOMMU_DOMAIN_SVA) return; - spin_lock(&iommu->lock); + guard(mutex)(&iommu->did_lock); info = xa_load(&domain->iommu_array, iommu->seq_id); if (--info->refcnt == 0) { ida_free(&iommu->domain_ida, info->did); @@ -1397,7 +1394,6 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) domain->nid = NUMA_NO_NODE; kfree(info); } - spin_unlock(&iommu->lock); } static void domain_exit(struct dmar_domain *domain) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 25faf3aadd24..5f140892fae0 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -722,6 +722,8 @@ struct intel_iommu { unsigned char name[16]; /* Device Name */ #ifdef CONFIG_INTEL_IOMMU + /* mutex to protect domain_ida */ + struct mutex did_lock; struct ida domain_ida; /* domain id allocator */ unsigned long *copied_tables; /* bitmap of copied tables */ spinlock_t lock; /* protect context, domain ids */ -- cgit v1.2.3 From 3df2ebcee9fb3bd2597d1758795471a3a3b2203c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 13 May 2025 11:07:38 +0800 Subject: iommu/vt-d: Eliminate pci_physfn() in dmar_find_matched_satc_unit() The function dmar_find_matched_satc_unit() contains a duplicate call to pci_physfn(). This call is unnecessary as pci_physfn() has already been invoked by the caller. Removing the redundant call simplifies the code and improves efficiency a bit. Signed-off-by: Wei Wang Link: https://lore.kernel.org/r/20250509140021.4029303-2-wei.w.wang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0f2845147058..d48ff945832d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2686,7 +2686,6 @@ static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) struct device *tmp; int i; - dev = pci_physfn(dev); rcu_read_lock(); list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { -- cgit v1.2.3 From f3fe7e1830c54abb24fcb0d6d98dd403e3e5fafd Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 13 May 2025 11:07:39 +0800 Subject: iommu/vt-d: Change dmar_ats_supported() to return boolean According to "Function return values and names" in coding-style.rst, the dmar_ats_supported() function should return a boolean instead of an integer. Also, rename "ret" to "supported" to be more straightforward. Signed-off-by: Wei Wang Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20250509140021.4029303-3-wei.w.wang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d48ff945832d..bb871674d8ac 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2702,15 +2702,16 @@ out: return satcu; } -static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) +static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) { - int i, ret = 1; - struct pci_bus *bus; struct pci_dev *bridge = NULL; - struct device *tmp; - struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; struct dmar_satc_unit *satcu; + struct acpi_dmar_atsr *atsr; + bool supported = true; + struct pci_bus *bus; + struct device *tmp; + int i; dev = pci_physfn(dev); satcu = dmar_find_matched_satc_unit(dev); @@ -2728,11 +2729,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) bridge = bus->self; /* If it's an integrated device, allow ATS */ if (!bridge) - return 1; + return true; /* Connected via non-PCIe: no ATS */ if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; + return false; /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; @@ -2751,11 +2752,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) if (atsru->include_all) goto out; } - ret = 0; + supported = false; out: rcu_read_unlock(); - return ret; + return supported; } int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) -- cgit v1.2.3 From 320302baed05c6456164652541f23d2a96522c06 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 20 May 2025 15:58:49 +0800 Subject: iommu/vt-d: Restore context entry setup order for aliased devices Commit 2031c469f816 ("iommu/vt-d: Add support for static identity domain") changed the context entry setup during domain attachment from a set-and-check policy to a clear-and-reset approach. This inadvertently introduced a regression affecting PCI aliased devices behind PCIe-to-PCI bridges. Specifically, keyboard and touchpad stopped working on several Apple Macbooks with below messages: kernel: platform pxa2xx-spi.3: Adding to iommu group 20 kernel: input: Apple SPI Keyboard as /devices/pci0000:00/0000:00:1e.3/pxa2xx-spi.3/spi_master/spi2/spi-APP000D:00/input/input0 kernel: DMAR: DRHD: handling fault status reg 3 kernel: DMAR: [DMA Read NO_PASID] Request device [00:1e.3] fault addr 0xffffa000 [fault reason 0x06] PTE Read access is not set kernel: DMAR: DRHD: handling fault status reg 3 kernel: DMAR: [DMA Read NO_PASID] Request device [00:1e.3] fault addr 0xffffa000 [fault reason 0x06] PTE Read access is not set kernel: applespi spi-APP000D:00: Error writing to device: 01 0e 00 00 kernel: DMAR: DRHD: handling fault status reg 3 kernel: DMAR: [DMA Read NO_PASID] Request device [00:1e.3] fault addr 0xffffa000 [fault reason 0x06] PTE Read access is not set kernel: DMAR: DRHD: handling fault status reg 3 kernel: applespi spi-APP000D:00: Error writing to device: 01 0e 00 00 Fix this by restoring the previous context setup order. Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain") Closes: https://lore.kernel.org/all/4dada48a-c5dd-4c30-9c85-5b03b0aa01f0@bfh.ch/ Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20250514060523.2862195-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20250520075849.755012-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 +++++++++++ drivers/iommu/intel/iommu.h | 1 + drivers/iommu/intel/nested.c | 4 ++-- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bb871674d8ac..226e174577ff 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1808,6 +1808,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return ret; info->domain = domain; + info->domain_attached = true; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); @@ -3204,6 +3205,10 @@ void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; + /* Device in DMA blocking state. Noting to do. */ + if (!info->domain_attached) + return; + if (info->domain) cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); @@ -3215,6 +3220,9 @@ void device_block_translation(struct device *dev) domain_context_clear(info); } + /* Device now in DMA blocking state. */ + info->domain_attached = false; + if (!info->domain) return; @@ -4298,6 +4306,9 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device else ret = device_setup_pass_through(dev); + if (!ret) + info->domain_attached = true; + return ret; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 5f140892fae0..fc12de633e28 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -775,6 +775,7 @@ struct device_domain_info { u8 ats_supported:1; u8 ats_enabled:1; u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ + u8 domain_attached:1; /* Device has domain attached */ u8 ats_qdep; unsigned int iopf_refcount; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 6ac5c534bef4..1e149169ee77 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, unsigned long flags; int ret = 0; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); if (iommu->agaw < dmar_domain->s2_domain->agaw) { dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n"); @@ -62,6 +61,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, goto unassign_tag; info->domain = dmar_domain; + info->domain_attached = true; spin_lock_irqsave(&dmar_domain->lock, flags); list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); -- cgit v1.2.3