diff options
24 files changed, 367 insertions, 243 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 03f2b2d4db30..f66a3effba73 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -33,10 +33,12 @@ properties: - description: Qcom SoCs implementing "arm,mmu-500" items: - enum: + - qcom,qcm2290-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sm6350-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml index 02c69a95c332..ce0c715205c6 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml @@ -43,6 +43,7 @@ properties: - renesas,ipmmu-r8a77980 # R-Car V3H - renesas,ipmmu-r8a77990 # R-Car E3 - renesas,ipmmu-r8a77995 # R-Car D3 + - renesas,ipmmu-r8a779a0 # R-Car V3U reg: maxItems: 1 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 23bef08a8388..ca4d0dee1ecd 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -106,6 +106,4 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -static inline void update_pasid(void) { } - #endif /* _ASM_X86_FPU_API_H */ diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 8dbe61e2b3c1..867535eb0ce9 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -138,6 +138,8 @@ #define EVENT_DOMID_MASK_HI 0xf0000 #define EVENT_FLAGS_MASK 0xfff #define EVENT_FLAGS_SHIFT 0x10 +#define EVENT_FLAG_RW 0x020 +#define EVENT_FLAG_I 0x008 /* feature control bits */ #define CONTROL_IOMMU_EN 0x00ULL diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 1722bb161841..beadcffcc223 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -473,6 +473,12 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) pci_dev_put(pdev); } +#define IS_IOMMU_MEM_TRANSACTION(flags) \ + (((flags) & EVENT_FLAG_I) == 0) + +#define IS_WRITE_REQUEST(flags) \ + ((flags) & EVENT_FLAG_RW) + static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, u64 address, int flags) { @@ -485,6 +491,20 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, dev_data = dev_iommu_priv_get(&pdev->dev); if (dev_data) { + /* + * If this is a DMA fault (for which the I(nterrupt) + * bit will be unset), allow report_iommu_fault() to + * prevent logging it. + */ + if (IS_IOMMU_MEM_TRANSACTION(flags)) { + if (!report_iommu_fault(&dev_data->domain->domain, + &pdev->dev, address, + IS_WRITE_REQUEST(flags) ? + IOMMU_FAULT_WRITE : + IOMMU_FAULT_READ)) + goto out; + } + if (__ratelimit(&dev_data->rs)) { pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", domain_id, address, flags); @@ -495,6 +515,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, domain_id, address, flags); } +out: if (pdev) pci_dev_put(pdev); } diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index fdfa39ec2a4d..96d4a1f8de79 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -15,7 +15,6 @@ #include <linux/bitfield.h> #include <linux/clk.h> #include <linux/dev_printk.h> -#include <linux/dma-iommu.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/interrupt.h> @@ -70,6 +69,8 @@ #define DART_ERROR_ADDR_HI 0x54 #define DART_ERROR_ADDR_LO 0x50 +#define DART_STREAMS_ENABLE 0xfc + #define DART_TCR(sid) (0x100 + 4 * (sid)) #define DART_TCR_TRANSLATE_ENABLE BIT(7) #define DART_TCR_BYPASS0_ENABLE BIT(8) @@ -301,6 +302,9 @@ static int apple_dart_hw_reset(struct apple_dart *dart) apple_dart_hw_disable_dma(&stream_map); apple_dart_hw_clear_all_ttbrs(&stream_map); + /* enable all streams globally since TCR is used to control isolation */ + writel(DART_STREAM_ALL, dart->regs + DART_STREAMS_ENABLE); + /* clear any pending errors before the interrupt is unmasked */ writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR); @@ -578,7 +582,6 @@ static struct iommu_domain *apple_dart_domain_alloc(unsigned int type) if (!dart_domain) return NULL; - iommu_get_dma_cookie(&dart_domain->domain); mutex_init(&dart_domain->init_lock); /* no need to allocate pgtbl_ops or do any other finalization steps */ @@ -702,13 +705,12 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) if (!group) goto out; - group_master_cfg = kzalloc(sizeof(*group_master_cfg), GFP_KERNEL); + group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), GFP_KERNEL); if (!group_master_cfg) { iommu_group_put(group); goto out; } - memcpy(group_master_cfg, cfg, sizeof(*group_master_cfg)); iommu_group_set_iommudata(group, group_master_cfg, apple_dart_release_group); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a388e318f86e..f5848b351b19 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -409,10 +409,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); /* Convert the erroneous command into a CMD_SYNC */ - if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) { - dev_err(smmu->dev, "failed to convert to CMD_SYNC\n"); - return; - } + arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } @@ -860,7 +857,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, { u64 cmd[CMDQ_ENT_DWORDS]; - if (arm_smmu_cmdq_build_cmd(cmd, ent)) { + if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) { dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", ent->opcode); return -EINVAL; @@ -885,11 +882,20 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) { + int index; + if (cmds->num == CMDQ_BATCH_ENTRIES) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); cmds->num = 0; } - arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd); + + index = cmds->num * CMDQ_ENT_DWORDS; + if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) { + dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", + cmd->opcode); + return; + } + cmds->num++; } @@ -1764,10 +1770,11 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) { int i; struct arm_smmu_cmdq_ent cmd; - struct arm_smmu_cmdq_batch cmds = {}; + struct arm_smmu_cmdq_batch cmds; arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + cmds.num = 0; for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 55690af1b25d..ca736b065dd0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -231,6 +231,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, { .compatible = "qcom,sc7280-mdss" }, + { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, @@ -403,12 +404,14 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, + { .compatible = "qcom,qcm2290-smmu-500" }, { .compatible = "qcom,sc7180-smmu-500" }, { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm6125-smmu-500" }, + { .compatible = "qcom,sm6350-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, { .compatible = "qcom,sm8350-smmu-500" }, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 896bea04c347..b42e38a0dbe2 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -98,9 +98,6 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) /** * iommu_get_dma_cookie - Acquire DMA-API resources for a domain * @domain: IOMMU domain to prepare for DMA-API usage - * - * IOMMU drivers should normally call this from their domain_alloc - * callback when domain->type == IOMMU_DOMAIN_DMA. */ int iommu_get_dma_cookie(struct iommu_domain *domain) { @@ -113,7 +110,6 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) return 0; } -EXPORT_SYMBOL(iommu_get_dma_cookie); /** * iommu_get_msi_cookie - Acquire just MSI remapping resources @@ -151,8 +147,6 @@ EXPORT_SYMBOL(iommu_get_msi_cookie); * iommu_put_dma_cookie - Release a domain's DMA mapping resources * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or * iommu_get_msi_cookie() - * - * IOMMU drivers should normally call this from their domain_free callback. */ void iommu_put_dma_cookie(struct iommu_domain *domain) { @@ -172,7 +166,6 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) kfree(cookie); domain->iova_cookie = NULL; } -EXPORT_SYMBOL(iommu_put_dma_cookie); /** * iommu_dma_get_resv_regions - Reserved region driver helper @@ -317,6 +310,11 @@ static bool dev_is_untrusted(struct device *dev) return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; } +static bool dev_use_swiotlb(struct device *dev) +{ + return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev); +} + /* sysfs updates are serialised by the mutex of the group owning @domain */ int iommu_dma_init_fq(struct iommu_domain *domain) { @@ -510,23 +508,6 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); } -static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - phys_addr_t phys; - - phys = iommu_iova_to_phys(domain, dma_addr); - if (WARN_ON(!phys)) - return; - - __iommu_dma_unmap(dev, dma_addr, size); - - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); -} - static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { @@ -553,52 +534,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return iova + iova_off; } -static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, - size_t org_size, dma_addr_t dma_mask, bool coherent, - enum dma_data_direction dir, unsigned long attrs) -{ - int prot = dma_info_to_prot(dir, coherent, attrs); - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t aligned_size = org_size; - void *padding_start; - size_t padding_size; - dma_addr_t iova; - - /* - * If both the physical buffer start address and size are - * page aligned, we don't need to use a bounce page. - */ - if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) && - iova_offset(iovad, phys | org_size)) { - aligned_size = iova_align(iovad, org_size); - phys = swiotlb_tbl_map_single(dev, phys, org_size, - aligned_size, dir, attrs); - - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - /* Cleanup the padding area. */ - padding_start = phys_to_virt(phys); - padding_size = aligned_size; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || - dir == DMA_BIDIRECTIONAL)) { - padding_start += org_size; - padding_size -= org_size; - } - - memset(padding_start, 0, padding_size); - } - - iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) - swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs); - return iova; -} - static void __iommu_dma_free_pages(struct page **pages, int count) { while (count--) @@ -616,7 +551,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, if (!order_mask) return NULL; - pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL); + pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); if (!pages) return NULL; @@ -794,7 +729,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); @@ -810,7 +745,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev, { phys_addr_t phys; - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); @@ -828,17 +763,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (!dev_is_dma_coherent(dev)) + if (dev_use_swiotlb(dev)) + for_each_sg(sgl, sg, nelems, i) + iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), + sg->length, dir); + else if (!dev_is_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); - - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_cpu(dev, sg_phys(sg), - sg->length, dir); - } } static void iommu_dma_sync_sg_for_device(struct device *dev, @@ -848,17 +779,14 @@ static void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) { - if (is_swiotlb_buffer(dev, sg_phys(sg))) - swiotlb_sync_single_for_device(dev, sg_phys(sg), - sg->length, dir); - - if (!dev_is_dma_coherent(dev)) + if (dev_use_swiotlb(dev)) + for_each_sg(sgl, sg, nelems, i) + iommu_dma_sync_single_for_device(dev, + sg_dma_address(sg), + sg->length, dir); + else if (!dev_is_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); - } } static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, @@ -867,22 +795,66 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, { phys_addr_t phys = page_to_phys(page) + offset; bool coherent = dev_is_dma_coherent(dev); - dma_addr_t dma_handle; + int prot = dma_info_to_prot(dir, coherent, attrs); + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + dma_addr_t iova, dma_mask = dma_get_mask(dev); + + /* + * If both the physical buffer start address and size are + * page aligned, we don't need to use a bounce page. + */ + if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) { + void *padding_start; + size_t padding_size, aligned_size; + + aligned_size = iova_align(iovad, size); + phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, + iova_mask(iovad), dir, attrs); + + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; - dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev), - coherent, dir, attrs); - if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - dma_handle != DMA_MAPPING_ERROR) + /* Cleanup the padding area. */ + padding_start = phys_to_virt(phys); + padding_size = aligned_size; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { + padding_start += size; + padding_size -= size; + } + + memset(padding_start, 0, padding_size); + } + + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) arch_sync_dma_for_device(phys, size, dir); - return dma_handle; + + iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); + if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + return iova; } static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir); - __iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs); + struct iommu_domain *domain = iommu_get_dma_domain(dev); + phys_addr_t phys; + + phys = iommu_iova_to_phys(domain, dma_handle); + if (WARN_ON(!phys)) + return; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + __iommu_dma_unmap(dev, dma_handle, size); + + if (unlikely(is_swiotlb_buffer(dev, phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } /* @@ -967,7 +939,7 @@ static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *s int i; for_each_sg(sg, s, nents, i) - __iommu_dma_unmap_swiotlb(dev, sg_dma_address(s), + iommu_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); } @@ -978,9 +950,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) { - sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s), - s->length, dma_get_mask(dev), - dev_is_dma_coherent(dev), dir, attrs); + sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s), + s->offset, s->length, dir, attrs); if (sg_dma_address(s) == DMA_MAPPING_ERROR) goto out_unmap; sg_dma_len(s) = s->length; @@ -1016,15 +987,16 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { ret = iommu_deferred_attach(dev, domain); - goto out; + if (ret) + goto out; } + if (dev_use_swiotlb(dev)) + return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs); + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) iommu_dma_sync_sg_for_device(dev, sg, nents, dir); - if (dev_is_untrusted(dev)) - return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs); - /* * Work out how much IOVA space we need, and align the segments to * IOVA granules for the IOMMU driver to handle. With some clever @@ -1097,14 +1069,14 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, struct scatterlist *tmp; int i; - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); - - if (dev_is_untrusted(dev)) { + if (dev_use_swiotlb(dev)) { iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs); return; } + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); + /* * The scatterlist segments are mapped into a single * contiguous IOVA allocation, so this is incredibly easy. diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 0ddb77115be7..247d0f2d5fdf 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -6,6 +6,9 @@ config DMAR_TABLE config DMAR_PERF bool +config DMAR_DEBUG + bool + config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) @@ -31,6 +34,7 @@ config INTEL_IOMMU_DEBUGFS bool "Export Intel IOMMU internals in Debugfs" depends on IOMMU_DEBUGFS select DMAR_PERF + select DMAR_DEBUG help !!!WARNING!!! diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index b12e421a2f1a..b39d223926a4 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -163,6 +163,14 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) check_irq_capabilities(iommu, i); } + /* + * If the system is sane to support scalable mode, either SL or FL + * should be sane. + */ + if (intel_cap_smts_sanity() && + !intel_cap_flts_sanity() && !intel_cap_slts_sanity()) + return -EOPNOTSUPP; + out: rcu_read_unlock(); return 0; @@ -203,3 +211,8 @@ bool intel_cap_flts_sanity(void) { return ecap_flts(intel_iommu_ecap_sanity); } + +bool intel_cap_slts_sanity(void) +{ + return ecap_slts(intel_iommu_ecap_sanity); +} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h index 74cfccae0e81..d07b75938961 100644 --- a/drivers/iommu/intel/cap_audit.h +++ b/drivers/iommu/intel/cap_audit.h @@ -111,6 +111,7 @@ bool intel_cap_smts_sanity(void); bool intel_cap_pasid_sanity(void); bool intel_cap_nest_sanity(void); bool intel_cap_flts_sanity(void); +bool intel_cap_slts_sanity(void); static inline bool scalable_mode_support(void) { diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b7708b93f3fa..915bff76fe96 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1941,12 +1941,16 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); - if (fault_type == INTR_REMAP) + if (fault_type == INTR_REMAP) { pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n", source_id >> 8, PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr >> 48, fault_reason, reason); - else if (pasid == INVALID_IOASID) + + return 0; + } + + if (pasid == INVALID_IOASID) pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), @@ -1959,6 +1963,8 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + dmar_fault_dump_ptes(iommu, source_id, addr, pasid); + return 0; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d75f59ae28e6..0bde0c8b4126 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -156,6 +156,8 @@ static struct intel_iommu **g_iommus; static void __init check_tylersburg_isoch(void); static int rwbf_quirk; +static inline struct device_domain_info * +dmar_search_domain_by_dev_info(int segment, int bus, int devfn); /* * set to 1 to panic kernel if can't successfully enable VT-d @@ -412,6 +414,7 @@ static int __init intel_iommu_setup(char *str) { if (!str) return -EINVAL; + while (*str) { if (!strncmp(str, "on", 2)) { dmar_disabled = 0; @@ -441,13 +444,16 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "tboot_noforce", 13)) { pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; + } else { + pr_notice("Unknown option - '%s'\n", str); } str += strcspn(str, ","); while (*str == ',') str++; } - return 0; + + return 1; } __setup("intel_iommu=", intel_iommu_setup); @@ -522,7 +528,7 @@ static inline void free_devinfo_mem(void *vaddr) static inline int domain_type_is_si(struct dmar_domain *domain) { - return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; + return domain->domain.type == IOMMU_DOMAIN_IDENTITY; } static inline bool domain_use_first_level(struct dmar_domain *domain) @@ -992,6 +998,117 @@ out: spin_unlock_irqrestore(&iommu->lock, flags); } +#ifdef CONFIG_DMAR_DEBUG +static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn) +{ + struct device_domain_info *info; + struct dma_pte *parent, *pte; + struct dmar_domain *domain; + int offset, level; + + info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + if (!info || !info->domain) { + pr_info("device [%02x:%02x.%d] not probed\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return; + } + + domain = info->domain; + level = agaw_to_level(domain->agaw); + parent = domain->pgd; + if (!parent) { + pr_info("no page table setup\n"); + return; + } + + while (1) { + offset = pfn_level_offset(pfn, level); + pte = &parent[offset]; + if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) { + pr_info("PTE not present at level %d\n", level); + break; + } + + pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); + + if (level == 1) + break; + + parent = phys_to_virt(dma_pte_addr(pte)); + level--; + } +} + +void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, + unsigned long long addr, u32 pasid) +{ + struct pasid_dir_entry *dir, *pde; + struct pasid_entry *entries, *pte; + struct context_entry *ctx_entry; + struct root_entry *rt_entry; + u8 devfn = source_id & 0xff; + u8 bus = source_id >> 8; + int i, dir_index, index; + + pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); + + /* root entry dump */ + rt_entry = &iommu->root_entry[bus]; + if (!rt_entry) { + pr_info("root table entry is not present\n"); + return; + } + + if (sm_supported(iommu)) + pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", + rt_entry->hi, rt_entry->lo); + else + pr_info("root entry: 0x%016llx", rt_entry->lo); + + /* context entry dump */ + ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); + if (!ctx_entry) { + pr_info("context table entry is not present\n"); + return; + } + + pr_info("context entry: hi 0x%016llx, low 0x%016llx\n", + ctx_entry->hi, ctx_entry->lo); + + /* legacy mode does not require PASID entries */ + if (!sm_supported(iommu)) + goto pgtable_walk; + + /* get the pointer to pasid directory entry */ + dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); + if (!dir) { + pr_info("pasid directory entry is not present\n"); + return; + } + /* For request-without-pasid, get the pasid from context entry */ + if (intel_iommu_sm && pasid == INVALID_IOASID) + pasid = PASID_RID2PASID; + + dir_index = pasid >> PASID_PDE_SHIFT; + pde = &dir[dir_index]; + pr_info("pasid dir entry: 0x%016llx\n", pde->val); + + /* get the pointer to the pasid table entry */ + entries = get_pasid_table_from_pde(pde); + if (!entries) { + pr_info("pasid table entry is not present\n"); + return; + } + index = pasid & PASID_PTE_MASK; + pte = &entries[index]; + for (i = 0; i < ARRAY_SIZE(pte->val); i++) + pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); + +pgtable_walk: + pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn); +} +#endif + static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn, int *target_level) { @@ -1874,12 +1991,21 @@ static void free_dmar_iommu(struct intel_iommu *iommu) * Check and return whether first level is used by default for * DMA translation. */ -static bool first_level_by_default(void) +static bool first_level_by_default(unsigned int type) { - return scalable_mode_support() && intel_cap_flts_sanity(); + /* Only SL is available in legacy mode */ + if (!scalable_mode_support()) + return false; + + /* Only level (either FL or SL) is available, just use it */ + if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity()) + return intel_cap_flts_sanity(); + + /* Both levels are available, decide it based on domain type */ + return type != IOMMU_DOMAIN_UNMANAGED; } -static struct dmar_domain *alloc_domain(int flags) +static struct dmar_domain *alloc_domain(unsigned int type) { struct dmar_domain *domain; @@ -1889,8 +2015,7 @@ static struct dmar_domain *alloc_domain(int flags) memset(domain, 0, sizeof(*domain)); domain->nid = NUMA_NO_NODE; - domain->flags = flags; - if (first_level_by_default()) + if (first_level_by_default(type)) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); @@ -2354,12 +2479,17 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; first_pte = pte; + lvl_pages = lvl_to_nr_pages(largepage_lvl); + /* It is large page*/ if (largepage_lvl > 1) { unsigned long end_pfn; + unsigned long pages_to_remove; pteval |= DMA_PTE_LARGE_PAGE; - end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1; + pages_to_remove = min_t(unsigned long, nr_pages, + nr_pte_to_next_page(pte) * lvl_pages); + end_pfn = iov_pfn + pages_to_remove - 1; switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; @@ -2381,10 +2511,6 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, WARN_ON(1); } - lvl_pages = lvl_to_nr_pages(largepage_lvl); - - BUG_ON(nr_pages < lvl_pages); - nr_pages -= lvl_pages; iov_pfn += lvl_pages; phys_pfn += lvl_pages; @@ -2708,7 +2834,7 @@ static int __init si_domain_init(int hw) struct device *dev; int i, nid, ret; - si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); + si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); if (!si_domain) return -EFAULT; @@ -4517,7 +4643,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: case IOMMU_DOMAIN_UNMANAGED: - dmar_domain = alloc_domain(0); + dmar_domain = alloc_domain(type); if (!dmar_domain) { pr_err("Can't allocate dmar_domain\n"); return NULL; @@ -5386,62 +5512,14 @@ static int intel_iommu_disable_sva(struct device *dev) return ret; } -/* - * A PCI express designated vendor specific extended capability is defined - * in the section 3.7 of Intel scalable I/O virtualization technical spec - * for system software and tools to detect endpoint devices supporting the - * Intel scalable IO virtualization without host driver dependency. - * - * Returns the address of the matching extended capability structure within - * the device's PCI configuration space or 0 if the device does not support - * it. - */ -static int siov_find_pci_dvsec(struct pci_dev *pdev) -{ - int pos; - u16 vendor, id; - - pos = pci_find_next_ext_capability(pdev, 0, 0x23); - while (pos) { - pci_read_config_word(pdev, pos + 4, &vendor); - pci_read_config_word(pdev, pos + 8, &id); - if (vendor == PCI_VENDOR_ID_INTEL && id == 5) - return pos; - - pos = pci_find_next_ext_capability(pdev, pos, 0x23); - } - - return 0; -} - -static bool -intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) +static int intel_iommu_enable_iopf(struct device *dev) { struct device_domain_info *info = get_domain_info(dev); - if (feat == IOMMU_DEV_FEAT_AUX) { - int ret; - - if (!dev_is_pci(dev) || dmar_disabled || - !scalable_mode_support() || !pasid_mode_support()) - return false; - - ret = pci_pasid_features(to_pci_dev(dev)); - if (ret < 0) - return false; - - return !!siov_find_pci_dvsec(to_pci_dev(dev)); - } - - if (feat == IOMMU_DEV_FEAT_IOPF) - return info && info->pri_supported; - - if (feat == IOMMU_DEV_FEAT_SVA) - return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && - info->pasid_supported && info->pri_supported && - info->ats_supported; + if (info && info->pri_supported) + return 0; - return false; + return -ENODEV; } static int @@ -5452,7 +5530,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) return intel_iommu_enable_auxd(dev); case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; + return intel_iommu_enable_iopf(dev); case IOMMU_DEV_FEAT_SVA: return intel_iommu_enable_sva(dev); @@ -5578,7 +5656,6 @@ const struct iommu_ops intel_iommu_ops = { .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .device_group = intel_iommu_device_group, - .dev_has_feat = intel_iommu_dev_has_feat, .dev_feat_enabled = intel_iommu_dev_feat_enabled, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 0c228787704f..5b5d69b04fcc 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -505,21 +505,6 @@ out: return ret; } -static void _load_pasid(void *unused) -{ - update_pasid(); -} - -static void load_pasid(struct mm_struct *mm, u32 pasid) -{ - mutex_lock(&mm->context.lock); - - /* Update PASID MSR on all CPUs running the mm's tasks. */ - on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true); - - mutex_unlock(&mm->context.lock); -} - static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, unsigned int flags) { @@ -614,10 +599,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, if (ret) goto free_sdev; - /* The newly allocated pasid is loaded to the mm. */ - if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) - load_pasid(mm, svm->pasid); - list_add_rcu(&sdev->list, &svm->devs); success: return &sdev->sva; @@ -670,11 +651,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - if (svm->notifier.ops) { + if (svm->notifier.ops) mmu_notifier_unregister(&svm->notifier, mm); - /* Clear mm's pasid. */ - load_pasid(mm, PASID_DISABLED); - } pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3303d707bab4..a80f13867bed 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1953,8 +1953,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, /* Assume all sizes by default; the driver may override this later */ domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; - /* Temporarily avoid -EEXIST while drivers still get their own cookies */ - if (iommu_is_dma_domain(domain) && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { + if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); domain = NULL; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d38ff29a76e8..ca752bdc710f 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -33,10 +33,10 @@ #define arm_iommu_detach_device(...) do {} while (0) #endif -#define IPMMU_CTX_MAX 8U +#define IPMMU_CTX_MAX 16U #define IPMMU_CTX_INVALID -1 -#define IPMMU_UTLB_MAX 48U +#define IPMMU_UTLB_MAX 64U struct ipmmu_features { bool use_ns_alias_offset; @@ -189,8 +189,12 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, static unsigned int ipmmu_ctx_reg(struct ipmmu_vmsa_device *mmu, unsigned int context_id, unsigned int reg) { - return mmu->features->ctx_offset_base + - context_id * mmu->features->ctx_offset_stride + reg; + unsigned int base = mmu->features->ctx_offset_base; + + if (context_id > 7) + base += 0x800 - 8 * 0x40; + + return base + context_id * mmu->features->ctx_offset_stride + reg; } static u32 ipmmu_ctx_read(struct ipmmu_vmsa_device *mmu, @@ -922,6 +926,20 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .utlb_offset_base = 0, }; +static const struct ipmmu_features ipmmu_features_r8a779a0 = { + .use_ns_alias_offset = false, + .has_cache_leaf_nodes = true, + .number_of_contexts = 16, + .num_utlbs = 64, + .setup_imbuscr = false, + .twobit_imttbcr_sl0 = true, + .reserved_context = true, + .cache_snoop = false, + .ctx_offset_base = 0x10000, + .ctx_offset_stride = 0x1040, + .utlb_offset_base = 0x3000, +}; + static const struct of_device_id ipmmu_of_ids[] = { { .compatible = "renesas,ipmmu-vmsa", @@ -954,12 +972,18 @@ static const struct of_device_id ipmmu_of_ids[] = { .compatible = "renesas,ipmmu-r8a77970", .data = &ipmmu_features_rcar_gen3, }, { + .compatible = "renesas,ipmmu-r8a77980", + .data = &ipmmu_features_rcar_gen3, + }, { .compatible = "renesas,ipmmu-r8a77990", .data = &ipmmu_features_rcar_gen3, }, { .compatible = "renesas,ipmmu-r8a77995", .data = &ipmmu_features_rcar_gen3, }, { + .compatible = "renesas,ipmmu-r8a779a0", + .data = &ipmmu_features_r8a779a0, + }, { /* Terminator */ }, }; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d837adfd1da5..25b834104790 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -550,7 +550,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, phys_addr_t pa; pa = dom->iop->iova_to_phys(dom->iop, iova); - if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) && + dom->data->enable_4GB && + pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); return pa; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 0a281833f611..e900e3c46903 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1079,7 +1079,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, struct tegra_mc *mc) { struct tegra_smmu *smmu; - size_t size; u32 value; int err; @@ -1097,9 +1096,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, */ mc->smmu = smmu; - size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); - - smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); + smmu->asids = devm_bitmap_zalloc(dev, soc->num_asids, GFP_KERNEL); if (!smmu->asids) return ERR_PTR(-ENOMEM); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e56a5faac395..cbdff8979980 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -380,7 +380,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs); + map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e04436a7ff27..45e903d84733 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -131,6 +131,14 @@ static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg) return 0; } +#ifdef CONFIG_DMAR_DEBUG +void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, + unsigned long long addr, u32 pasid); +#else +static inline void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, + unsigned long long addr, u32 pasid) {} +#endif + #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; extern int intel_iommu_init(void); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 05a65eb155f7..69230fd695ea 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -517,9 +517,6 @@ struct context_entry { u64 hi; }; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - /* * When VT-d works in the scalable mode, it allows DMA translation to * happen through either first level or second level page table. This @@ -708,9 +705,15 @@ static inline bool dma_pte_superpage(struct dma_pte *pte) return (pte->val & DMA_PTE_LARGE_PAGE); } -static inline int first_pte_in_page(struct dma_pte *pte) +static inline bool first_pte_in_page(struct dma_pte *pte) +{ + return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE); +} + +static inline int nr_pte_to_next_page(struct dma_pte *pte) { - return !((unsigned long)pte & ~VTD_PAGE_MASK); + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) : + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b0cb2a9973f4..569272871375 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs); + unsigned int alloc_aligned_mask, enum dma_data_direction dir, + unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 87c40517e822..019672b3da1d 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -459,7 +459,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) * allocate a buffer from that IO TLB pool. */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size) + size_t alloc_size, unsigned int alloc_align_mask) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned long boundary_mask = dma_get_seg_boundary(dev); @@ -483,6 +483,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; if (alloc_size >= PAGE_SIZE) stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); + stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); spin_lock_irqsave(&mem->lock, flags); if (unlikely(nslots > mem->nslabs - mem->used)) @@ -541,7 +542,8 @@ found: phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs) + unsigned int alloc_align_mask, enum dma_data_direction dir, + unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -561,7 +563,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } - index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); + index = swiotlb_find_slots(dev, orig_addr, + alloc_size + offset, alloc_align_mask); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -675,7 +678,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, swiotlb_force); - swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; @@ -759,7 +762,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (!mem) return NULL; - index = swiotlb_find_slots(dev, 0, size); + index = swiotlb_find_slots(dev, 0, size, 0); if (index == -1) return NULL; |