diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 20:53:04 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 20:54:35 +0300 |
commit | fd9f2a912255106d3b53163d816a4bd99ba29664 (patch) | |
tree | 892f38b617b534c93dc5c0b1f1efc3950812e828 | |
parent | 84798f2849942bb5e8817417adfdfa6241df2835 (diff) | |
parent | 429f27e36874e34727a1f8495be2ea3a7060732c (diff) | |
download | linux-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.xz |
Merge branch 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu intoiommufd/for-next
Jason Gunthorpe says:
====================
iommufd follows the same design as KVM and uses memory cgroups to limit
the amount of kernel memory a iommufd file descriptor can pin down. The
various internal data structures already use GFP_KERNEL_ACCOUNT to charge
its own memory.
However, one of the biggest consumers of kernel memory is the IOPTEs
stored under the iommu_domain and these allocations are not tracked.
This series is the first step in fixing it.
The iommu driver contract already includes a 'gfp' argument to the
map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then
having the driver allocate the IOPTE tables with that flag will capture a
significant amount of the allocations.
Update the iommu_map() API to pass in the GFP argument, and fix all call
sites. Replace iommu_map_atomic().
Audit the "enterprise" iommu drivers to make sure they do the right thing.
Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is
problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are
already correct.
A follow up series will be needed to capture the allocations made when the
iommu_domain itself is allocated, which will complete the job.
====================
* 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
iommu/s390: Use GFP_KERNEL in sleepable contexts
iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s
iommu/intel: Use GFP_KERNEL in sleepable contexts
iommu/intel: Support the gfp argument to the map_pages op
iommu/intel: Add a gfp parameter to alloc_pgtable_page()
iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map()
iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous()
iommu: Add a gfp parameter to iommu_map_sg()
iommu: Remove iommu_map_atomic()
iommu: Add a gfp parameter to iommu_map()
Link: https://lore.kernel.org/linux-iommu/0-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 11 | ||||
-rw-r--r-- | arch/s390/include/asm/pci_dma.h | 5 | ||||
-rw-r--r-- | arch/s390/pci/pci_dma.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/tegra/drm.c | 2 | ||||
-rw-r--r-- | drivers/gpu/host1x/cdma.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/usnic/usnic_uiom.c | 4 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 18 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 36 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 2 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 53 | ||||
-rw-r--r-- | drivers/iommu/iommufd/pages.c | 6 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 15 | ||||
-rw-r--r-- | drivers/media/platform/qcom/venus/firmware.c | 2 | ||||
-rw-r--r-- | drivers/net/ipa/ipa_mem.c | 6 | ||||
-rw-r--r-- | drivers/net/wireless/ath/ath10k/snoc.c | 2 | ||||
-rw-r--r-- | drivers/net/wireless/ath/ath11k/ahb.c | 4 | ||||
-rw-r--r-- | drivers/remoteproc/remoteproc_core.c | 5 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 9 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 2 | ||||
-rw-r--r-- | include/linux/iommu.h | 31 |
22 files changed, 126 insertions, 125 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c135f6e37a00..8bc01071474a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, len = (j - i) << PAGE_SHIFT; ret = iommu_map(mapping->domain, iova, phys, len, - __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs)); + __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), + GFP_KERNEL); if (ret < 0) goto fail; iova += len; @@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, iova, phys, len, prot); + ret = iommu_map(mapping->domain, iova, phys, len, prot, + GFP_KERNEL); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, + prot, GFP_KERNEL); if (ret < 0) goto fail; @@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; - ret = iommu_map(mapping->domain, dma_addr, addr, len, prot); + ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); if (ret < 0) goto fail; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 91e63426bdc5..7119c04c51c5 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(void); +unsigned long *dma_alloc_cpu_table(gfp_t gfp); void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr); +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp); void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); extern const struct dma_map_ops s390_pci_dma_ops; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ea478d11fbd1..2d9b01d7ca4c 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_region_table_cache, gfp); if (!table) return NULL; @@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table) kmem_cache_free(dma_region_table_cache, table); } -static unsigned long *dma_alloc_page_table(void) +static unsigned long *dma_alloc_page_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_page_table_cache, gfp); if (!table) return NULL; @@ -63,7 +63,7 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) { unsigned long old_rte, rte; unsigned long *sto; @@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) if (reg_entry_isvalid(rte)) { sto = get_rt_sto(rte); } else { - sto = dma_alloc_cpu_table(); + sto = dma_alloc_cpu_table(gfp); if (!sto) return NULL; @@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *step) +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) { unsigned long old_ste, ste; unsigned long *pto; @@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) if (reg_entry_isvalid(ste)) { pto = get_st_pto(ste); } else { - pto = dma_alloc_page_table(); + pto = dma_alloc_page_table(gfp); if (!pto) return NULL; set_st_pto(&ste, virt_to_phys(pto)); @@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) return pto; } -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp) { unsigned long *sto, *pto; unsigned int rtx, sx, px; rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx]); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); if (!sto) return NULL; sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx]); + pto = dma_get_page_table_origin(&sto[sx], gfp); if (!pto) return NULL; @@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, return -EINVAL; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) { rc = -ENOMEM; goto undo_cpu_trans; @@ -186,7 +188,8 @@ undo_cpu_trans: while (i-- > 0) { page_addr -= PAGE_SIZE; dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) break; dma_update_cpu_trans(entry, page_addr, flags); @@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) spin_lock_init(&zdev->iommu_bitmap_lock); - zdev->dma_table = dma_alloc_cpu_table(); + zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!zdev->dma_table) { rc = -ENOMEM; goto out; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 648ecf5a8fbc..a4ac94a2ab57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, u32 offset = (r->offset + i) << imem->iommu_pgshift; ret = iommu_map(imem->domain, offset, node->dma_addrs[i], - PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret < 0) { nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 7bd2e65c2a16..6ca9f396e55b 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) *dma = iova_dma_addr(&tegra->carveout.domain, alloc); err = iommu_map(tegra->domain, *dma, virt_to_phys(virt), - size, IOMMU_READ | IOMMU_WRITE); + size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (err < 0) goto free_iova; diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 103fda055394..4ddfcd2138c9 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) pb->dma = iova_dma_addr(&host1x->iova, alloc); err = iommu_map(host1x->domain, pb->dma, pb->phys, size, - IOMMU_READ); + IOMMU_READ, GFP_KERNEL); if (err) goto iommu_free_iova; } else { diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c301b3be9f30..aeeaca65ace9 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -277,7 +277,7 @@ iter_chunk: usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", va_start, &pa_start, size, flags); err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); @@ -294,7 +294,7 @@ iter_chunk: usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", va_start, &pa_start, size, flags); err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f798c44e0903..c99e4bc55d8c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -713,7 +713,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, if (!iova) return DMA_MAPPING_ERROR; - if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { + if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } @@ -822,7 +822,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, if (!iova) goto out_free_pages; - if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL)) + /* + * Remove the zone/policy flags from the GFP - these are applied to the + * __iommu_dma_alloc_pages() but are not used for the supporting + * internal allocations that follow. + */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); + + if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) goto out_free_iova; if (!(ioprot & IOMMU_CACHE)) { @@ -833,7 +840,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, arch_dma_prep_coherent(sg_page(sg), sg->length); } - ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); + ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, + gfp); if (ret < 0 || ret < size) goto out_free_sg; @@ -1281,7 +1289,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); + ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); if (ret < 0 || ret < iova_len) goto out_free_iova; @@ -1615,7 +1623,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!iova) goto out_free_page; - if (iommu_map(domain, iova, msi_addr, size, prot)) + if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) goto out_free_iova; INIT_LIST_HEAD(&msi_page->list); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7cfab5fd5e59..6fd223cf639f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -4136,7 +4140,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..ca9a035e0110 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, extern int dmar_ir_support(void); -void *alloc_pgtable_page(int node); +void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..c5bf74e9372d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node); + entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 834e6ecf3e51..dd9b60897c86 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -931,7 +931,7 @@ map_end: if (map_size) { ret = iommu_map(domain, addr - map_size, addr - map_size, map_size, - entry->prot); + entry->prot, GFP_KERNEL); if (ret) goto out; map_size = 0; @@ -2384,34 +2384,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static int _iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); if (ret == 0 && ops->iotlb_sync_map) ops->iotlb_sync_map(domain, iova, size); return ret; } - -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - might_sleep(); - return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map); -int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); -} -EXPORT_SYMBOL_GPL(iommu_map_atomic); - static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2501,9 +2494,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - gfp_t gfp) +ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; @@ -2511,6 +2504,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, unsigned int i = 0; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); @@ -2550,21 +2550,8 @@ out_err: return ret; } - -ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - might_sleep(); - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map_sg); -ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); -} - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 1e1d3509efae..f8d92c9bb65b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain, size % PAGE_SIZE); while (size) { - rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; @@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot); + next_iova - iova, area->iommu_prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index bb00580a30d8..c489714ddc4d 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -50,7 +50,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) if (!s390_domain) return NULL; - s390_domain->dma_table = dma_alloc_cpu_table(); + s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!s390_domain->dma_table) { kfree(s390_domain); return NULL; @@ -258,7 +258,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, static int s390_iommu_validate_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) + unsigned long nr_pages, int flags, + gfp_t gfp) { phys_addr_t page_addr = pa & PAGE_MASK; unsigned long *entry; @@ -266,7 +267,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, int rc; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + gfp); if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; @@ -282,7 +284,7 @@ undo_cpu_trans: while (i-- > 0) { dma_addr -= PAGE_SIZE; entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); + dma_addr, gfp); if (!entry) break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); @@ -299,7 +301,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, int rc = 0; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + GFP_ATOMIC); if (unlikely(!entry)) { rc = -EINVAL; break; @@ -337,7 +340,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags); + pgcount, flags, gfp); if (!rc) *mapped = size; diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 142d4c74017c..07d4dceb5e72 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -158,7 +158,7 @@ static int venus_boot_no_tz(struct venus_core *core, phys_addr_t mem_phys, core->fw.mapped_mem_size = mem_size; ret = iommu_map(iommu, VENUS_FW_START_ADDR, mem_phys, mem_size, - IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV, GFP_KERNEL); if (ret) { dev_err(dev, "could not map video firmware region\n"); return ret; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 9ec5af323f73..991a7d39f066 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -466,7 +466,8 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; @@ -574,7 +575,8 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index cfcb759a87de..9a82f0336d95 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1639,7 +1639,7 @@ static int ath10k_fw_init(struct ath10k *ar) ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr, ar->msa.paddr, ar->msa.mem_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath10k_err(ar, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index d34a4d6325b2..df8fdc7067f9 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1021,7 +1021,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; @@ -1029,7 +1029,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware CE region: %d\n", ret); goto err_iommu_unmap; diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 1cd4815a6dd1..80072b6b6283 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -643,7 +643,8 @@ static int rproc_handle_devmem(struct rproc *rproc, void *ptr, if (!mapping) return -ENOMEM; - ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags); + ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags, + GFP_KERNEL); if (ret) { dev_err(dev, "failed to map devmem: %d\n", ret); goto out; @@ -737,7 +738,7 @@ static int rproc_alloc_carveout(struct rproc *rproc, } ret = iommu_map(rproc->domain, mem->da, dma, mem->len, - mem->flags); + mem->flags, GFP_KERNEL); if (ret) { dev_err(dev, "iommu_map failed: %d\n", ret); goto free_mapping; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 393b27a3bd87..7bea5594bc45 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1479,7 +1479,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | IOMMU_CACHE); + npage << PAGE_SHIFT, prot | IOMMU_CACHE, + GFP_KERNEL); if (ret) goto unwind; @@ -1776,8 +1777,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, size = npage << PAGE_SHIFT; } - ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | IOMMU_CACHE); + ret = iommu_map(domain->domain, iova, phys, size, + dma->prot | IOMMU_CACHE, GFP_KERNEL); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1865,7 +1866,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ec32f785dfde..fd1536de5b1d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -792,7 +792,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm)); + perm_to_iommu_flags(perm), GFP_KERNEL); } if (r) { vhost_iotlb_del_range(iotlb, iova, iova + size - 1); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 933cc57bfc48..35d5d50e6d12 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -467,19 +467,15 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); -extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); -extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot); + struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -773,14 +769,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return -ENODEV; -} - -static inline int iommu_map_atomic(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { return -ENODEV; } @@ -800,14 +789,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain, static inline ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return -ENODEV; -} - -static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) + unsigned int nents, int prot, gfp_t gfp) { return -ENODEV; } @@ -1118,7 +1100,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, static inline size_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { - return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot); + return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, + GFP_KERNEL); } #ifdef CONFIG_IOMMU_DEBUGFS |