diff options
Diffstat (limited to 'drivers/iommu')
31 files changed, 521 insertions, 206 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b3aa1f5d5321..1469ad0794f2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -199,7 +199,6 @@ source "drivers/iommu/iommufd/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6386fa4556d9..6fac9ee8dd3e 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -87,7 +87,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); */ void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); -void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 601fb4ee6900..6fb2f2919ab1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -175,6 +175,7 @@ #define CONTROL_GAM_EN 25 #define CONTROL_GALOG_EN 28 #define CONTROL_GAINT_EN 29 +#define CONTROL_EPH_EN 45 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 #define CONTROL_IRTCACHEDIS 59 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 43131c3a2172..ff11cd7e5c06 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2647,6 +2647,10 @@ static void iommu_init_flags(struct amd_iommu *iommu) /* Set IOTLB invalidation timeout to 1s */ iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + + /* Enable Enhanced Peripheral Page Request Handling */ + if (check_feature(FEATURE_EPHSUP)) + iommu_feature_enable(iommu, CONTROL_EPH_EN); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) @@ -3651,6 +3655,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index c616de2c5926..a56a27396305 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, iova, map_size, gfp, &updated); if (!pte) { - ret = -EINVAL; + ret = -ENOMEM; goto out; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8364cd6fa47d..6a019670efc7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -107,7 +107,9 @@ static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { struct acpi_device *adev = ACPI_COMPANION(dev); - struct acpihid_map_entry *p; + struct acpihid_map_entry *p, *p1 = NULL; + int hid_count = 0; + bool fw_bug; if (!adev) return -ENODEV; @@ -115,12 +117,33 @@ static inline int get_acpihid_device_id(struct device *dev, list_for_each_entry(p, &acpihid_map, list) { if (acpi_dev_hid_uid_match(adev, p->hid, p->uid[0] ? p->uid : NULL)) { - if (entry) - *entry = p; - return p->devid; + p1 = p; + fw_bug = false; + hid_count = 1; + break; + } + + /* + * Count HID matches w/o UID, raise FW_BUG but allow exactly one match + */ + if (acpi_dev_hid_match(adev, p->hid)) { + p1 = p; + hid_count++; + fw_bug = true; } } - return -EINVAL; + + if (!p1) + return -EINVAL; + if (fw_bug) + dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n", + hid_count, hid_count > 1 ? "s" : ""); + if (hid_count > 1) + return -EINVAL; + if (entry) + *entry = p1; + + return p1->devid; } static inline int get_device_sbdf_id(struct device *dev) @@ -460,8 +483,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } @@ -838,6 +861,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) { iommu_ga_log_notifier = notifier; + /* + * Ensure all in-flight IRQ handlers run to completion before returning + * to the caller, e.g. to ensure module code isn't unloaded while it's + * being executed in the IRQ handler. + */ + if (!notifier) + synchronize_rcu(); + return 0; } EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); @@ -1606,15 +1637,6 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_complete(domain); } -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag) { struct iommu_dev_data *dev_data; @@ -2330,8 +2352,21 @@ static inline u64 dma_max_address(void) if (amd_iommu_pgtable == AMD_IOMMU_V1) return ~0ULL; - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) @@ -3669,7 +3704,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a7c36654dee5..32f3e91a7d7f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -397,6 +397,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 353fea58cd31..780e2d9e4ea8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2702,9 +2702,14 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * Translation Requests and Translated transactions are denied * as though ATS is disabled for the stream (STE.EATS == 0b00), * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events - * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be - * enabled if we have arm_smmu_domain, those always have page - * tables. + * (IHI0070Ea 5.2 Stream Table Entry). + * + * However, if we have installed a CD table and are using S1DSS + * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS + * skipping stage 1". + * + * Disable ATS if we are going to create a normal 0b100 bypass + * STE. */ state->ats_enabled = arm_smmu_ats_supported(master); } @@ -3017,8 +3022,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, if (arm_smmu_ssids_in_use(&master->cd_table)) { /* * If a CD table has to be present then we need to run with ATS - * on even though the RID will fail ATS queries with UR. This is - * because we have no idea what the PASID's need. + * on because we have to assume a PASID is using ATS. For + * IDENTITY this will setup things so that S1DSS=bypass which + * follows the explanation in "13.6.4 Full ATS skipping stage 1" + * and allows for ATS on the RID to work. */ state.cd_needs_ats = true; arm_smmu_attach_prepare(&state, domain); @@ -3213,6 +3220,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3223,10 +3231,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } @@ -4609,7 +4627,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Initialise in-memory data structures */ ret = arm_smmu_init_structures(smmu); if (ret) - return ret; + goto err_free_iopf; /* Record our private device structure */ platform_set_drvdata(pdev, smmu); @@ -4620,22 +4638,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) - return ret; + goto err_disable; /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); if (ret) - return ret; + goto err_disable; ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - iommu_device_sysfs_remove(&smmu->iommu); - return ret; + goto err_free_sysfs; } return 0; + +err_free_sysfs: + iommu_device_sysfs_remove(&smmu->iommu); +err_disable: + arm_smmu_device_disable(smmu); +err_free_iopf: + iopf_queue_free(smmu->evtq.iopf); + return ret; } static void arm_smmu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 6e41ddaa24d6..dd7d030d2e89 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -79,7 +79,6 @@ #define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define VCMDQ_LOG2SIZE_MAX 19 #define TEGRA241_VCMDQ_BASE 0x00000 #define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 @@ -488,29 +487,21 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) /* VCMDQ Resource Helpers */ -static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) -{ - struct arm_smmu_queue *q = &vcmdq->cmdq.q; - size_t nents = 1 << q->llq.max_n_shift; - size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; - - if (!q->base) - return; - dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); -} - static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) { struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; struct arm_smmu_queue *q = &cmdq->q; char name[16]; + u32 regval; int ret; snprintf(name, 16, "vcmdq%u", vcmdq->idx); - /* Queue size, capped to ensure natural alignment */ - q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, @@ -558,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; char header[64]; - tegra241_vcmdq_free_smmu_cmdq(vcmdq); + /* Note that the lvcmdq queue memory space is managed by devres */ + tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, @@ -766,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); if (!vintf) - goto out_fallback; + return -ENOMEM; /* Init VINTF0 for in-kernel use */ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); if (ret) { dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; + return ret; } /* Preallocate logical VCMDQs to VINTF0 */ @@ -781,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); if (IS_ERR(vcmdq)) - goto free_lvcmdq; + return PTR_ERR(vcmdq); } /* Now, we are ready to run all the impl ops */ smmu->impl_ops = &tegra241_cmdqv_impl_ops; return 0; - -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -out_fallback: - dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); - smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; - tegra241_cmdqv_remove(smmu); - return 0; } #ifdef CONFIG_IOMMU_DEBUGFS diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6372f3e25c4b..0c35a235ab6d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -258,6 +258,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6115-mdss" }, { .compatible = "qcom,sm6350-mdss" }, { .compatible = "qcom,sm6375-mdss" }, { .compatible = "qcom,sm8150-mdss" }, @@ -567,6 +568,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2a9fa0c8cc00..0f0caf59023c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1815,7 +1815,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) static DEFINE_MUTEX(msi_prepare_lock); /* see below */ if (!domain || !domain->iova_cookie) { - desc->iommu_cookie = NULL; + msi_desc_set_iommu_msi_iova(desc, 0, 0); return 0; } @@ -1827,11 +1827,12 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) mutex_lock(&msi_prepare_lock); msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); mutex_unlock(&msi_prepare_lock); - - msi_desc_set_iommu_cookie(desc, msi_page); - if (!msi_page) return -ENOMEM; + + msi_desc_set_iommu_msi_iova( + desc, msi_page->iova, + ilog2(cookie_msi_granule(domain->iova_cookie))); return 0; } @@ -1842,18 +1843,15 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) */ void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) { - struct device *dev = msi_desc_to_dev(desc); - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - const struct iommu_dma_msi_page *msi_page; +#ifdef CONFIG_IRQ_MSI_IOMMU + if (desc->iommu_msi_shift) { + u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; - msi_page = msi_desc_get_iommu_cookie(desc); - - if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) - return; - - msg->address_hi = upper_32_bits(msi_page->iova); - msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; - msg->address_lo += lower_32_bits(msi_page->iova); + msg->address_hi = upper_32_bits(msi_iova); + msg->address_lo = lower_32_bits(msi_iova) | + (msg->address_lo & ((1 << desc->iommu_msi_shift) - 1)); + } +#endif } static int iommu_dma_init(void) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c666ecab955d..7465dbb6fa80 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "saving state\n"); __sysmmu_disable(data); } @@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "restoring state\n"); __sysmmu_enable(data); } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index eaf862e8dea1..7f553f7aa3cb 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2056,6 +2056,7 @@ int enable_drhd_fault_handling(unsigned int cpu) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; int ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc23cfcdeb2d..b300f72cf01e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1957,6 +1957,18 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } +static bool domain_need_iotlb_sync_map(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + if (cap_caching_mode(iommu->cap) && !domain->use_first_level) + return true; + + if (rwbf_quirk || cap_rwbf(iommu->cap)) + return true; + + return false; +} + static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -1970,6 +1982,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return ret; info->domain = domain; + info->domain_attached = true; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); @@ -1993,6 +2006,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; + domain->iotlb_sync_map |= domain_need_iotlb_sync_map(domain, iommu); + return 0; out_block_translation: @@ -3174,6 +3189,7 @@ static int __init probe_acpi_namespace_devices(void) if (dev->bus != &acpi_bus_type) continue; + up_read(&dmar_global_lock); adev = to_acpi_device(dev); mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, @@ -3183,6 +3199,7 @@ static int __init probe_acpi_namespace_devices(void) break; } mutex_unlock(&adev->physical_node_lock); + down_read(&dmar_global_lock); if (ret) return ret; @@ -3307,7 +3324,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } @@ -3372,6 +3396,10 @@ void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; + /* Device in DMA blocking state. Noting to do. */ + if (!info->domain_attached) + return; + if (info->domain) cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); @@ -3384,6 +3412,9 @@ void device_block_translation(struct device *dev) domain_context_clear(info); } + /* Device now in DMA blocking state. */ + info->domain_attached = false; + if (!info->domain) return; @@ -4261,7 +4292,10 @@ static bool risky_device(struct pci_dev *pdev) static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + if (dmar_domain->iotlb_sync_map) + cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); return 0; } @@ -4547,9 +4581,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } @@ -4583,6 +4614,9 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device ret = device_setup_pass_through(dev); } + if (!ret) + info->domain_attached = true; + return ret; } @@ -4660,6 +4694,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4737,7 +4774,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1497f3112b12..f521155fb793 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -614,6 +614,9 @@ struct dmar_domain { u8 has_mappings:1; /* Has mappings configured through * iommu_map() interface. */ + u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write + * buffer when creating mappings. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -776,6 +779,7 @@ struct device_domain_info { u8 ats_supported:1; u8 ats_enabled:1; u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ + u8 domain_attached:1; /* Device has domain attached */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 7a6d188e3bea..71b3383b7115 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -26,11 +26,6 @@ #include "../iommu-pages.h" #include "cap_audit.h" -enum irq_mode { - IRQ_REMAPPING, - IRQ_POSTING, -}; - struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; @@ -50,8 +45,8 @@ struct irq_2_iommu { u16 irte_index; u16 sub_handle; u8 irte_mask; - enum irq_mode mode; bool posted_msi; + bool posted_vcpu; }; struct intel_ir_data { @@ -139,7 +134,6 @@ static int alloc_irte(struct intel_iommu *iommu, irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - irq_iommu->mode = IRQ_REMAPPING; } raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); @@ -194,8 +188,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, rc = qi_flush_iec(iommu, index, 0); - /* Update iommu mode according to the IRTE mode */ - irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; @@ -1173,7 +1165,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} #endif -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) +static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + + /* + * Don't modify IRTEs for IRQs that are being posted to vCPUs if the + * host CPU affinity changes. + */ + if (ir_data->irq_2_iommu.posted_vcpu && !force_host) + return; + + ir_data->irq_2_iommu.posted_vcpu = false; + + if (ir_data->irq_2_iommu.posted_msi) + intel_ir_reconfigure_irte_posted(irqd); + else + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); +} + +static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) { struct intel_ir_data *ir_data = irqd->chip_data; struct irte *irte = &ir_data->irte_entry; @@ -1186,10 +1197,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->vector = cfg->vector; irte->dest_id = IRTE_DEST(cfg->dest_apicid); - if (ir_data->irq_2_iommu.posted_msi) - intel_ir_reconfigure_irte_posted(irqd); - else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); + __intel_ir_reconfigure_irte(irqd, force_host); } /* @@ -1244,7 +1252,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) /* stop posting interrupts, back to the default mode */ if (!vcpu_pi_info) { - modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); + __intel_ir_reconfigure_irte(data, true); } else { struct irte irte_pi; @@ -1267,6 +1275,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); + ir_data->irq_2_iommu.posted_vcpu = true; modify_irte(&ir_data->irq_2_iommu, &irte_pi); } @@ -1282,43 +1291,44 @@ static struct irq_chip intel_ir_chip = { }; /* - * With posted MSIs, all vectors are multiplexed into a single notification - * vector. Devices MSIs are then dispatched in a demux loop where - * EOIs can be coalesced as well. + * With posted MSIs, the MSI vectors are multiplexed into a single notification + * vector, and only the notification vector is sent to the APIC IRR. Device + * MSIs are then dispatched in a demux loop that harvests the MSIs from the + * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to + * the APIC IRR, and thus do not need an EOI. The notification handler instead + * performs a single EOI after processing the PIR. * - * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() - * function. Instead EOI is performed by the posted interrupt notification - * handler. + * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be + * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only - * one apic_eoi() is needed. + * one apic_eoi() is needed, but each MSI needs to process pending changes to + * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() + * */ - -static void dummy_ack(struct irq_data *d) { } - static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = dummy_ack, + .irq_ack = irq_move_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, @@ -1494,6 +1504,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, struct intel_ir_data *data = irq_data->chip_data; struct irte entry; + WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu); + data->irq_2_iommu.posted_vcpu = false; + memset(&entry, 0, sizeof(entry)); modify_irte(&data->irq_2_iommu, &entry); } diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 433c58944401..3b5251034a87 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, unsigned long flags; int ret = 0; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); if (iommu->agaw < dmar_domain->s2_domain->agaw) { dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n"); @@ -62,6 +61,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, goto unassign_tag; info->domain = dmar_domain; + info->domain_attached = true; spin_lock_irqsave(&dmar_domain->lock, flags); list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 4674e618797c..8b5926c1452e 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -478,6 +478,7 @@ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) ops->page_response(dev, iopf, &resp); list_del_init(&group->pending_node); + iopf_free_group(group); } mutex_unlock(&fault_param->lock); diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index de5b54eaa8bf..a5913c0b02a0 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -17,6 +17,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +void dev_iommu_free(struct device *dev); + const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 83c8e617a2c5..0ad55649e2d0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -347,7 +347,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) return param; } -static void dev_iommu_free(struct device *dev) +void dev_iommu_free(struct device *dev) { struct dev_iommu *param = dev->iommu; @@ -503,6 +503,9 @@ static void iommu_deinit_device(struct device *dev) dev->iommu_group = NULL; module_put(ops->owner); dev_iommu_free(dev); +#ifdef CONFIG_IOMMU_DMA + dev->dma_iommu = false; +#endif } DEFINE_MUTEX(iommu_probe_device_lock); @@ -2391,6 +2394,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; size_t offset, pgsize, pgsize_next; + size_t offset_end; unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ @@ -2431,7 +2435,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, * If size is big enough to accommodate the larger page, reduce * the number of smaller pages. */ - if (offset + pgsize_next <= size) + if (!check_add_overflow(offset, pgsize_next, &offset_end) && + offset_end <= size) size = offset; out_set_count: @@ -3112,6 +3117,11 @@ int iommu_device_use_default_domain(struct device *dev) return 0; mutex_lock(&group->mutex); + /* We may race against bus_iommu_probe() finalising groups here */ + if (!group->default_domain) { + ret = -EPROBE_DEFER; + goto unlock_out; + } if (group->owner_cnt) { if (group->domain != group->default_domain || group->owner || !xa_empty(&group->pasid_array)) { diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 5fd3dd420290..74480ae6bfc0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include <linux/iommu.h> #include <linux/iommufd.h> +#include <linux/pci-ats.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> @@ -352,6 +353,122 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, return 0; } +/* The device attach/detach/replace helpers for attach_handle */ + +/* Check if idev is attached to igroup->hwpt */ +static bool iommufd_device_is_attached(struct iommufd_device *idev) +{ + struct iommufd_device *cur; + + list_for_each_entry(cur, &idev->igroup->device_list, group_item) + if (cur == idev) + return true; + return false; +} + +static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + int rc; + + lockdep_assert_held(&idev->igroup->lock); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + if (hwpt->fault) { + rc = iommufd_fault_iopf_enable(idev); + if (rc) + goto out_free_handle; + } + + handle->idev = idev; + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + if (rc) + goto out_disable_iopf; + + return 0; + +out_disable_iopf: + if (hwpt->fault) + iommufd_fault_iopf_disable(idev); +out_free_handle: + kfree(handle); + return rc; +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev) +{ + struct iommu_attach_handle *handle; + + lockdep_assert_held(&idev->igroup->lock); + + handle = + iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + if (IS_ERR(handle)) + return NULL; + return to_iommufd_handle(handle); +} + +static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev); + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + if (hwpt->fault) { + iommufd_auto_response_faults(hwpt, handle); + iommufd_fault_iopf_disable(idev); + } + kfree(handle); +} + +static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *old_handle = + iommufd_device_get_attach_handle(idev); + int rc; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + if (hwpt->fault && !old->fault) { + rc = iommufd_fault_iopf_enable(idev); + if (rc) + goto out_free_handle; + } + + handle->idev = idev; + rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, + &handle->handle); + if (rc) + goto out_disable_iopf; + + if (old->fault) { + iommufd_auto_response_faults(hwpt, old_handle); + if (!hwpt->fault) + iommufd_fault_iopf_disable(idev); + } + kfree(old_handle); + + return 0; + +out_disable_iopf: + if (hwpt->fault && !old->fault) + iommufd_fault_iopf_disable(idev); +out_free_handle: + kfree(handle); + return rc; +} + int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { @@ -488,6 +605,11 @@ iommufd_device_do_replace(struct iommufd_device *idev, goto err_unlock; } + if (!iommufd_device_is_attached(idev)) { + rc = -EINVAL; + goto err_unlock; + } + if (hwpt == igroup->hwpt) { mutex_unlock(&idev->igroup->lock); return NULL; @@ -1127,7 +1249,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; - int rc; + int rc = -EINVAL; if (!length) return -EINVAL; @@ -1183,7 +1305,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1240,6 +1363,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index b8393a8c0753..1b0812f8bf84 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -16,7 +16,7 @@ #include "../iommu-priv.h" #include "iommufd_private.h" -static int iommufd_fault_iopf_enable(struct iommufd_device *idev) +int iommufd_fault_iopf_enable(struct iommufd_device *idev) { struct device *dev = idev->dev; int ret; @@ -45,7 +45,7 @@ static int iommufd_fault_iopf_enable(struct iommufd_device *idev) return ret; } -static void iommufd_fault_iopf_disable(struct iommufd_device *idev) +void iommufd_fault_iopf_disable(struct iommufd_device *idev) { mutex_lock(&idev->iopf_lock); if (!WARN_ON(idev->iopf_enabled == 0)) { @@ -93,20 +93,28 @@ int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, return ret; } -static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, - struct iommufd_attach_handle *handle) +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) { struct iommufd_fault *fault = hwpt->fault; struct iopf_group *group, *next; + struct list_head free_list; unsigned long index; if (!fault) return; + INIT_LIST_HEAD(&free_list); mutex_lock(&fault->mutex); + spin_lock(&fault->lock); list_for_each_entry_safe(group, next, &fault->deliver, node) { if (group->attach_handle != &handle->handle) continue; + list_move(&group->node, &free_list); + } + spin_unlock(&fault->lock); + + list_for_each_entry_safe(group, next, &free_list, node) { list_del(&group->node); iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); iopf_free_group(group); @@ -208,6 +216,7 @@ void iommufd_fault_destroy(struct iommufd_object *obj) { struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); struct iopf_group *group, *next; + unsigned long index; /* * The iommufd object's reference count is zero at this point. @@ -220,6 +229,13 @@ void iommufd_fault_destroy(struct iommufd_object *obj) iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); iopf_free_group(group); } + xa_for_each(&fault->response, index, group) { + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_destroy(&fault->response); + mutex_destroy(&fault->mutex); } static void iommufd_compose_fault_message(struct iommu_fault *fault, @@ -242,7 +258,7 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, { size_t fault_size = sizeof(struct iommu_hwpt_pgfault); struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_pgfault data; + struct iommu_hwpt_pgfault data = {}; struct iommufd_device *idev; struct iopf_group *group; struct iopf_fault *iopf; @@ -253,17 +269,19 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, return -ESPIPE; mutex_lock(&fault->mutex); - while (!list_empty(&fault->deliver) && count > done) { - group = list_first_entry(&fault->deliver, - struct iopf_group, node); - - if (group->fault_count * fault_size > count - done) + while ((group = iommufd_fault_deliver_fetch(fault))) { + if (done >= count || + group->fault_count * fault_size > count - done) { + iommufd_fault_deliver_restore(fault, group); break; + } rc = xa_alloc(&fault->response, &group->cookie, group, xa_limit_32b, GFP_KERNEL); - if (rc) + if (rc) { + iommufd_fault_deliver_restore(fault, group); break; + } idev = to_iommufd_handle(group->attach_handle)->idev; list_for_each_entry(iopf, &group->faults, list) { @@ -272,13 +290,12 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, group->cookie); if (copy_to_user(buf + done, &data, fault_size)) { xa_erase(&fault->response, group->cookie); + iommufd_fault_deliver_restore(fault, group); rc = -EFAULT; break; } done += fault_size; } - - list_del(&group->node); } mutex_unlock(&fault->mutex); @@ -336,10 +353,10 @@ static __poll_t iommufd_fault_fops_poll(struct file *filep, __poll_t pollflags = EPOLLOUT; poll_wait(filep, &fault->wait_queue, wait); - mutex_lock(&fault->mutex); + spin_lock(&fault->lock); if (!list_empty(&fault->deliver)) pollflags |= EPOLLIN | EPOLLRDNORM; - mutex_unlock(&fault->mutex); + spin_unlock(&fault->lock); return pollflags; } @@ -381,6 +398,7 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) INIT_LIST_HEAD(&fault->deliver); xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); mutex_init(&fault->mutex); + spin_lock_init(&fault->lock); init_waitqueue_head(&fault->wait_queue); filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, @@ -429,9 +447,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) hwpt = group->attach_handle->domain->fault_data; fault = hwpt->fault; - mutex_lock(&fault->mutex); + spin_lock(&fault->lock); list_add_tail(&group->node, &fault->deliver); - mutex_unlock(&fault->mutex); + spin_unlock(&fault->lock); wake_up_interruptible(&fault->wait_queue); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index d06bf6e6c19f..2454627a8b61 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -122,6 +122,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_FAULT_ID_VALID) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 4bf7ccd39d46..067222b238b7 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) return iter->area; } -static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, - unsigned long length, - unsigned long iova_alignment, - unsigned long page_offset) +static bool __alloc_iova_check_range(unsigned long *start, unsigned long last, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) { - if (span->is_used || span->last_hole - span->start_hole < length - 1) + unsigned long aligned_start; + + /* ALIGN_UP() */ + if (check_add_overflow(*start, iova_alignment - 1, &aligned_start)) return false; + aligned_start &= ~(iova_alignment - 1); + aligned_start |= page_offset; - span->start_hole = ALIGN(span->start_hole, iova_alignment) | - page_offset; - if (span->start_hole > span->last_hole || - span->last_hole - span->start_hole < length - 1) + if (aligned_start >= last || last - aligned_start < length - 1) return false; + *start = aligned_start; return true; } -static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { - if (span->is_hole || span->last_used - span->start_used < length - 1) + if (span->is_used) return false; + return __alloc_iova_check_range(&span->start_hole, span->last_hole, + length, iova_alignment, page_offset); +} - span->start_used = ALIGN(span->start_used, iova_alignment) | - page_offset; - if (span->start_used > span->last_used || - span->last_used - span->start_used < length - 1) +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole) return false; - return true; + return __alloc_iova_check_range(&span->start_used, span->last_used, + length, iova_alignment, page_offset); } /* @@ -696,8 +705,10 @@ again: iommufd_access_notify_unmap(iopt, area_first, length); /* Something is not responding to unmap requests. */ tries++; - if (WARN_ON(tries > 100)) - return -EDEADLOCK; + if (WARN_ON(tries > 100)) { + rc = -EDEADLOCK; + goto out_unmapped; + } goto again; } @@ -719,6 +730,7 @@ again: out_unlock_iova: up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); +out_unmapped: if (unmapped) *unmapped = unmapped_bytes; return rc; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f1d865e6fab6..18cdf1391a03 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -462,14 +462,39 @@ struct iommufd_fault { struct iommufd_ctx *ictx; struct file *filep; - /* The lists of outstanding faults protected by below mutex. */ - struct mutex mutex; + spinlock_t lock; /* protects the deliver list */ struct list_head deliver; + struct mutex mutex; /* serializes response flows */ struct xarray response; struct wait_queue_head wait_queue; }; +/* Fetch the first node out of the fault->deliver list */ +static inline struct iopf_group * +iommufd_fault_deliver_fetch(struct iommufd_fault *fault) +{ + struct list_head *list = &fault->deliver; + struct iopf_group *group = NULL; + + spin_lock(&fault->lock); + if (!list_empty(list)) { + group = list_first_entry(list, struct iopf_group, node); + list_del(&group->node); + } + spin_unlock(&fault->lock); + return group; +} + +/* Restore a node back to the head of the fault->deliver list */ +static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault, + struct iopf_group *group) +{ + spin_lock(&fault->lock); + list_add(&group->node, &fault->deliver); + spin_unlock(&fault->lock); +} + struct iommufd_attach_handle { struct iommu_attach_handle handle; struct iommufd_device *idev; @@ -498,35 +523,10 @@ int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt, struct iommufd_hw_pagetable *old); -static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - if (hwpt->fault) - return iommufd_fault_domain_attach_dev(hwpt, idev); - - return iommu_attach_group(hwpt->domain, idev->igroup->group); -} - -static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - if (hwpt->fault) { - iommufd_fault_domain_detach_dev(hwpt, idev); - return; - } - - iommu_detach_group(hwpt->domain, idev->igroup->group); -} - -static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - if (old->fault || hwpt->fault) - return iommufd_fault_domain_replace_dev(idev, hwpt, old); - - return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); -} +int iommufd_fault_iopf_enable(struct iommufd_device *idev); +void iommufd_fault_iopf_disable(struct iommufd_device *idev); +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle); #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index d90b9e253412..2cdc4f542df4 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,7 +130,7 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1 << bitmap->mapped.pgshift; + unsigned long pgsize = 1UL << bitmap->mapped.pgshift; return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); } diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index b5f5d27ee963..649fe79d0f0c 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -130,7 +130,7 @@ static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, if (wait_event_timeout(ictx->destroy_wait, refcount_read(&to_destroy->shortterm_users) == 0, - msecs_to_jiffies(10000))) + msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ff55b8c30712..ae69691471e9 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1087,7 +1087,7 @@ static int ipmmu_probe(struct platform_device *pdev) * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) */ if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s", dev_name(&pdev->dev)); if (ret) return ret; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6a2707fe7a78..32deab732209 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1371,15 +1371,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); mutex_init(&data->mutex); - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, - "mtk-iommu.%pa", &ioaddr); - if (ret) - goto out_link_remove; - - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); - if (ret) - goto out_sysfs_remove; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) { list_add_tail(&data->list, data->plat_data->hw_list); data->hw_list = data->plat_data->hw_list; @@ -1389,19 +1380,28 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->hw_list = &data->hw_list_head; } + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, + "mtk-iommu.%pa", &ioaddr); + if (ret) + goto out_list_del; + + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); + if (ret) + goto out_sysfs_remove; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); if (ret) - goto out_list_del; + goto out_device_unregister; } return ret; -out_list_del: - list_del(&data->list); +out_device_unregister: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); -out_link_remove: +out_list_del: + list_del(&data->list); if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) device_link_remove(data->smicomm_dev, dev); out_runtime_disable: diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e7a6a1611d19..e3fcab925a54 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -118,6 +118,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np) int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { + bool dev_iommu_present; int err; if (!master_np) @@ -129,6 +130,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, mutex_unlock(&iommu_probe_device_lock); return 0; } + dev_iommu_present = dev->iommu; /* * We don't currently walk up the tree looking for a parent IOMMU. @@ -149,8 +151,10 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, err = of_iommu_configure_device(master_np, dev, id); } - if (err) + if (err && dev_iommu_present) iommu_fwspec_free(dev); + else if (err && dev->iommu) + dev_iommu_free(dev); mutex_unlock(&iommu_probe_device_lock); if (!err && dev->bus) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4b369419b32c..4c7f470a4752 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1154,7 +1154,6 @@ static int rk_iommu_of_xlate(struct device *dev, iommu_dev = of_find_device_by_node(args->np); data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1192,6 +1191,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; |