summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/amd/amd_iommu.h1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd/init.c12
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c2
-rw-r--r--drivers/iommu/amd/iommu.c71
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c6
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c53
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c40
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c2
-rw-r--r--drivers/iommu/dma-iommu.c28
-rw-r--r--drivers/iommu/exynos-iommu.c4
-rw-r--r--drivers/iommu/intel/dmar.c1
-rw-r--r--drivers/iommu/intel/iommu.c46
-rw-r--r--drivers/iommu/intel/iommu.h4
-rw-r--r--drivers/iommu/intel/irq_remapping.c71
-rw-r--r--drivers/iommu/intel/nested.c4
-rw-r--r--drivers/iommu/io-pgfault.c1
-rw-r--r--drivers/iommu/iommu-priv.h2
-rw-r--r--drivers/iommu/iommu.c14
-rw-r--r--drivers/iommu/iommufd/device.c157
-rw-r--r--drivers/iommu/iommufd/fault.c52
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c3
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c48
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h62
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c2
-rw-r--r--drivers/iommu/iommufd/main.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/mtk_iommu.c26
-rw-r--r--drivers/iommu/of_iommu.c6
-rw-r--r--drivers/iommu/rockchip-iommu.c3
31 files changed, 521 insertions, 206 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index b3aa1f5d5321..1469ad0794f2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -199,7 +199,6 @@ source "drivers/iommu/iommufd/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
- select DMAR_TABLE if INTEL_IOMMU
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6386fa4556d9..6fac9ee8dd3e 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -87,7 +87,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
*/
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
-void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 601fb4ee6900..6fb2f2919ab1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -175,6 +175,7 @@
#define CONTROL_GAM_EN 25
#define CONTROL_GALOG_EN 28
#define CONTROL_GAINT_EN 29
+#define CONTROL_EPH_EN 45
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
#define CONTROL_IRTCACHEDIS 59
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 43131c3a2172..ff11cd7e5c06 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2647,6 +2647,10 @@ static void iommu_init_flags(struct amd_iommu *iommu)
/* Set IOTLB invalidation timeout to 1s */
iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
+
+ /* Enable Enhanced Peripheral Page Request Handling */
+ if (check_feature(FEATURE_EPHSUP))
+ iommu_feature_enable(iommu, CONTROL_EPH_EN);
}
static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
@@ -3651,6 +3655,14 @@ found:
while (*uid == '0' && *(uid + 1))
uid++;
+ if (strlen(hid) >= ACPIHID_HID_LEN) {
+ pr_err("Invalid command line: hid is too long\n");
+ return 1;
+ } else if (strlen(uid) >= ACPIHID_UID_LEN) {
+ pr_err("Invalid command line: uid is too long\n");
+ return 1;
+ }
+
i = early_acpihid_map_size++;
memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index c616de2c5926..a56a27396305 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
iova, map_size, gfp, &updated);
if (!pte) {
- ret = -EINVAL;
+ ret = -ENOMEM;
goto out;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 8364cd6fa47d..6a019670efc7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -107,7 +107,9 @@ static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
- struct acpihid_map_entry *p;
+ struct acpihid_map_entry *p, *p1 = NULL;
+ int hid_count = 0;
+ bool fw_bug;
if (!adev)
return -ENODEV;
@@ -115,12 +117,33 @@ static inline int get_acpihid_device_id(struct device *dev,
list_for_each_entry(p, &acpihid_map, list) {
if (acpi_dev_hid_uid_match(adev, p->hid,
p->uid[0] ? p->uid : NULL)) {
- if (entry)
- *entry = p;
- return p->devid;
+ p1 = p;
+ fw_bug = false;
+ hid_count = 1;
+ break;
+ }
+
+ /*
+ * Count HID matches w/o UID, raise FW_BUG but allow exactly one match
+ */
+ if (acpi_dev_hid_match(adev, p->hid)) {
+ p1 = p;
+ hid_count++;
+ fw_bug = true;
}
}
- return -EINVAL;
+
+ if (!p1)
+ return -EINVAL;
+ if (fw_bug)
+ dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n",
+ hid_count, hid_count > 1 ? "s" : "");
+ if (hid_count > 1)
+ return -EINVAL;
+ if (entry)
+ *entry = p1;
+
+ return p1->devid;
}
static inline int get_device_sbdf_id(struct device *dev)
@@ -460,8 +483,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
static void pdev_enable_caps(struct pci_dev *pdev)
{
- pdev_enable_cap_ats(pdev);
pdev_enable_cap_pasid(pdev);
+ pdev_enable_cap_ats(pdev);
pdev_enable_cap_pri(pdev);
}
@@ -838,6 +861,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
{
iommu_ga_log_notifier = notifier;
+ /*
+ * Ensure all in-flight IRQ handlers run to completion before returning
+ * to the caller, e.g. to ensure module code isn't unloaded while it's
+ * being executed in the IRQ handler.
+ */
+ if (!notifier)
+ synchronize_rcu();
+
return 0;
}
EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
@@ -1606,15 +1637,6 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
domain_flush_complete(domain);
}
-void amd_iommu_domain_update(struct protection_domain *domain)
-{
- /* Update device table */
- amd_iommu_update_and_flush_device_table(domain);
-
- /* Flush domain TLB(s) and wait for completion */
- amd_iommu_domain_flush_all(domain);
-}
-
int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
{
struct iommu_dev_data *dev_data;
@@ -2330,8 +2352,21 @@ static inline u64 dma_max_address(void)
if (amd_iommu_pgtable == AMD_IOMMU_V1)
return ~0ULL;
- /* V2 with 4/5 level page table */
- return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
+ /*
+ * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page
+ * Translation" shows that the V2 table sign extends the top of the
+ * address space creating a reserved region in the middle of the
+ * translation, just like the CPU does. Further Vasant says the docs are
+ * incomplete and this only applies to non-zero PASIDs. If the AMDv2
+ * page table is assigned to the 0 PASID then there is no sign extension
+ * check.
+ *
+ * Since the IOMMU must have a fixed geometry, and the core code does
+ * not understand sign extended addressing, we have to chop off the high
+ * bit to get consistent behavior with attachments of the domain to any
+ * PASID.
+ */
+ return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1);
}
static bool amd_iommu_hd_support(struct amd_iommu *iommu)
@@ -3669,7 +3704,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
* we should not modify the IRTE
*/
if (!dev_data || !dev_data->use_vapic)
- return 0;
+ return -EINVAL;
ir_data->cfg = irqd_cfg(data);
pi_data->ir_data = ir_data;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a7c36654dee5..32f3e91a7d7f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -397,6 +397,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
return ERR_CAST(smmu_domain);
smmu_domain->domain.type = IOMMU_DOMAIN_SVA;
smmu_domain->domain.ops = &arm_smmu_sva_domain_ops;
+
+ /*
+ * Choose page_size as the leaf page size for invalidation when
+ * ARM_SMMU_FEAT_RANGE_INV is present
+ */
+ smmu_domain->domain.pgsize_bitmap = PAGE_SIZE;
smmu_domain->smmu = smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 353fea58cd31..780e2d9e4ea8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2702,9 +2702,14 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
* Translation Requests and Translated transactions are denied
* as though ATS is disabled for the stream (STE.EATS == 0b00),
* causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
- * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
- * enabled if we have arm_smmu_domain, those always have page
- * tables.
+ * (IHI0070Ea 5.2 Stream Table Entry).
+ *
+ * However, if we have installed a CD table and are using S1DSS
+ * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
+ * skipping stage 1".
+ *
+ * Disable ATS if we are going to create a normal 0b100 bypass
+ * STE.
*/
state->ats_enabled = arm_smmu_ats_supported(master);
}
@@ -3017,8 +3022,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
if (arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
- * on even though the RID will fail ATS queries with UR. This is
- * because we have no idea what the PASID's need.
+ * on because we have to assume a PASID is using ATS. For
+ * IDENTITY this will setup things so that S1DSS=bypass which
+ * follows the explanation in "13.6.4 Full ATS skipping stage 1"
+ * and allows for ATS on the RID to work.
*/
state.cd_needs_ats = true;
arm_smmu_attach_prepare(&state, domain);
@@ -3213,6 +3220,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++) {
struct arm_smmu_stream *new_stream = &master->streams[i];
+ struct rb_node *existing;
u32 sid = fwspec->ids[i];
new_stream->id = sid;
@@ -3223,10 +3231,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
break;
/* Insert into SID tree */
- if (rb_find_add(&new_stream->node, &smmu->streams,
- arm_smmu_streams_cmp_node)) {
- dev_warn(master->dev, "stream %u already in tree\n",
- sid);
+ existing = rb_find_add(&new_stream->node, &smmu->streams,
+ arm_smmu_streams_cmp_node);
+ if (existing) {
+ struct arm_smmu_master *existing_master =
+ rb_entry(existing, struct arm_smmu_stream, node)
+ ->master;
+
+ /* Bridged PCI devices may end up with duplicated IDs */
+ if (existing_master == master)
+ continue;
+
+ dev_warn(master->dev,
+ "stream %u already in tree from dev %s\n", sid,
+ dev_name(existing_master->dev));
ret = -EINVAL;
break;
}
@@ -4609,7 +4627,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Initialise in-memory data structures */
ret = arm_smmu_init_structures(smmu);
if (ret)
- return ret;
+ goto err_free_iopf;
/* Record our private device structure */
platform_set_drvdata(pdev, smmu);
@@ -4620,22 +4638,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Reset the device */
ret = arm_smmu_device_reset(smmu);
if (ret)
- return ret;
+ goto err_disable;
/* And we're up. Go go go! */
ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
"smmu3.%pa", &ioaddr);
if (ret)
- return ret;
+ goto err_disable;
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
- iommu_device_sysfs_remove(&smmu->iommu);
- return ret;
+ goto err_free_sysfs;
}
return 0;
+
+err_free_sysfs:
+ iommu_device_sysfs_remove(&smmu->iommu);
+err_disable:
+ arm_smmu_device_disable(smmu);
+err_free_iopf:
+ iopf_queue_free(smmu->evtq.iopf);
+ return ret;
}
static void arm_smmu_device_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 6e41ddaa24d6..dd7d030d2e89 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -79,7 +79,6 @@
#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q))
#define VCMDQ_ADDR GENMASK(47, 5)
#define VCMDQ_LOG2SIZE GENMASK(4, 0)
-#define VCMDQ_LOG2SIZE_MAX 19
#define TEGRA241_VCMDQ_BASE 0x00000
#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008
@@ -488,29 +487,21 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
/* VCMDQ Resource Helpers */
-static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
-{
- struct arm_smmu_queue *q = &vcmdq->cmdq.q;
- size_t nents = 1 << q->llq.max_n_shift;
- size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
-
- if (!q->base)
- return;
- dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
-}
-
static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq;
struct arm_smmu_queue *q = &cmdq->q;
char name[16];
+ u32 regval;
int ret;
snprintf(name, 16, "vcmdq%u", vcmdq->idx);
- /* Queue size, capped to ensure natural alignment */
- q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX);
+ /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */
+ regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
+ q->llq.max_n_shift =
+ min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval));
/* Use the common helper to init the VCMDQ, and then... */
ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
@@ -558,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
char header[64];
- tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ /* Note that the lvcmdq queue memory space is managed by devres */
+
tegra241_vintf_deinit_lvcmdq(vintf, lidx);
dev_dbg(vintf->cmdqv->dev,
@@ -766,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
if (!vintf)
- goto out_fallback;
+ return -ENOMEM;
/* Init VINTF0 for in-kernel use */
ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
if (ret) {
dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
- goto free_vintf;
+ return ret;
}
/* Preallocate logical VCMDQs to VINTF0 */
@@ -781,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
if (IS_ERR(vcmdq))
- goto free_lvcmdq;
+ return PTR_ERR(vcmdq);
}
/* Now, we are ready to run all the impl ops */
smmu->impl_ops = &tegra241_cmdqv_impl_ops;
return 0;
-
-free_lvcmdq:
- for (lidx--; lidx >= 0; lidx--)
- tegra241_vintf_free_lvcmdq(vintf, lidx);
- tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
-free_vintf:
- kfree(vintf);
-out_fallback:
- dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
- smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
- tegra241_cmdqv_remove(smmu);
- return 0;
}
#ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 6372f3e25c4b..0c35a235ab6d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -258,6 +258,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
+ { .compatible = "qcom,sm6115-mdss" },
{ .compatible = "qcom,sm6350-mdss" },
{ .compatible = "qcom,sm6375-mdss" },
{ .compatible = "qcom,sm8150-mdss" },
@@ -567,6 +568,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data },
+ { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 2a9fa0c8cc00..0f0caf59023c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1815,7 +1815,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
static DEFINE_MUTEX(msi_prepare_lock); /* see below */
if (!domain || !domain->iova_cookie) {
- desc->iommu_cookie = NULL;
+ msi_desc_set_iommu_msi_iova(desc, 0, 0);
return 0;
}
@@ -1827,11 +1827,12 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
mutex_lock(&msi_prepare_lock);
msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
mutex_unlock(&msi_prepare_lock);
-
- msi_desc_set_iommu_cookie(desc, msi_page);
-
if (!msi_page)
return -ENOMEM;
+
+ msi_desc_set_iommu_msi_iova(
+ desc, msi_page->iova,
+ ilog2(cookie_msi_granule(domain->iova_cookie)));
return 0;
}
@@ -1842,18 +1843,15 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
*/
void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
{
- struct device *dev = msi_desc_to_dev(desc);
- const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- const struct iommu_dma_msi_page *msi_page;
+#ifdef CONFIG_IRQ_MSI_IOMMU
+ if (desc->iommu_msi_shift) {
+ u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift;
- msi_page = msi_desc_get_iommu_cookie(desc);
-
- if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
- return;
-
- msg->address_hi = upper_32_bits(msi_page->iova);
- msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
- msg->address_lo += lower_32_bits(msi_page->iova);
+ msg->address_hi = upper_32_bits(msi_iova);
+ msg->address_lo = lower_32_bits(msi_iova) |
+ (msg->address_lo & ((1 << desc->iommu_msi_shift) - 1));
+ }
+#endif
}
static int iommu_dma_init(void)
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c666ecab955d..7465dbb6fa80 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index eaf862e8dea1..7f553f7aa3cb 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -2056,6 +2056,7 @@ int enable_drhd_fault_handling(unsigned int cpu)
/*
* Enable fault control interrupt.
*/
+ guard(rwsem_read)(&dmar_global_lock);
for_each_iommu(iommu, drhd) {
u32 fault_status;
int ret;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cc23cfcdeb2d..b300f72cf01e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1957,6 +1957,18 @@ static bool dev_is_real_dma_subdevice(struct device *dev)
pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
}
+static bool domain_need_iotlb_sync_map(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ if (cap_caching_mode(iommu->cap) && !domain->use_first_level)
+ return true;
+
+ if (rwbf_quirk || cap_rwbf(iommu->cap))
+ return true;
+
+ return false;
+}
+
static int dmar_domain_attach_device(struct dmar_domain *domain,
struct device *dev)
{
@@ -1970,6 +1982,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
return ret;
info->domain = domain;
+ info->domain_attached = true;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1993,6 +2006,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
goto out_block_translation;
+ domain->iotlb_sync_map |= domain_need_iotlb_sync_map(domain, iommu);
+
return 0;
out_block_translation:
@@ -3174,6 +3189,7 @@ static int __init probe_acpi_namespace_devices(void)
if (dev->bus != &acpi_bus_type)
continue;
+ up_read(&dmar_global_lock);
adev = to_acpi_device(dev);
mutex_lock(&adev->physical_node_lock);
list_for_each_entry(pn,
@@ -3183,6 +3199,7 @@ static int __init probe_acpi_namespace_devices(void)
break;
}
mutex_unlock(&adev->physical_node_lock);
+ down_read(&dmar_global_lock);
if (ret)
return ret;
@@ -3307,7 +3324,14 @@ int __init intel_iommu_init(void)
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
+ /*
+ * The iommu device probe is protected by the iommu_probe_device_lock.
+ * Release the dmar_global_lock before entering the device probe path
+ * to avoid unnecessary lock order splat.
+ */
+ up_read(&dmar_global_lock);
iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+ down_read(&dmar_global_lock);
iommu_pmu_register(iommu);
}
@@ -3372,6 +3396,10 @@ void device_block_translation(struct device *dev)
struct intel_iommu *iommu = info->iommu;
unsigned long flags;
+ /* Device in DMA blocking state. Noting to do. */
+ if (!info->domain_attached)
+ return;
+
if (info->domain)
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
@@ -3384,6 +3412,9 @@ void device_block_translation(struct device *dev)
domain_context_clear(info);
}
+ /* Device now in DMA blocking state. */
+ info->domain_attached = false;
+
if (!info->domain)
return;
@@ -4261,7 +4292,10 @@ static bool risky_device(struct pci_dev *pdev)
static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ if (dmar_domain->iotlb_sync_map)
+ cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
return 0;
}
@@ -4547,9 +4581,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *
{
struct device *dev = data;
- if (dev != &pdev->dev)
- return 0;
-
return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
}
@@ -4583,6 +4614,9 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
ret = device_setup_pass_through(dev);
}
+ if (!ret)
+ info->domain_attached = true;
+
return ret;
}
@@ -4660,6 +4694,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+/* QM57/QS57 integrated gfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
+
/* Broadwell igfx malfunctions with dmar */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
@@ -4737,7 +4774,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1497f3112b12..f521155fb793 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -614,6 +614,9 @@ struct dmar_domain {
u8 has_mappings:1; /* Has mappings configured through
* iommu_map() interface.
*/
+ u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write
+ * buffer when creating mappings.
+ */
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */
@@ -776,6 +779,7 @@ struct device_domain_info {
u8 ats_supported:1;
u8 ats_enabled:1;
u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
+ u8 domain_attached:1; /* Device has domain attached */
u8 ats_qdep;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 7a6d188e3bea..71b3383b7115 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -26,11 +26,6 @@
#include "../iommu-pages.h"
#include "cap_audit.h"
-enum irq_mode {
- IRQ_REMAPPING,
- IRQ_POSTING,
-};
-
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
@@ -50,8 +45,8 @@ struct irq_2_iommu {
u16 irte_index;
u16 sub_handle;
u8 irte_mask;
- enum irq_mode mode;
bool posted_msi;
+ bool posted_vcpu;
};
struct intel_ir_data {
@@ -139,7 +134,6 @@ static int alloc_irte(struct intel_iommu *iommu,
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
- irq_iommu->mode = IRQ_REMAPPING;
}
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -194,8 +188,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
rc = qi_flush_iec(iommu, index, 0);
- /* Update iommu mode according to the IRTE mode */
- irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
@@ -1173,7 +1165,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
#endif
-static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
+static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
+{
+ struct intel_ir_data *ir_data = irqd->chip_data;
+
+ /*
+ * Don't modify IRTEs for IRQs that are being posted to vCPUs if the
+ * host CPU affinity changes.
+ */
+ if (ir_data->irq_2_iommu.posted_vcpu && !force_host)
+ return;
+
+ ir_data->irq_2_iommu.posted_vcpu = false;
+
+ if (ir_data->irq_2_iommu.posted_msi)
+ intel_ir_reconfigure_irte_posted(irqd);
+ else
+ modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+}
+
+static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
{
struct intel_ir_data *ir_data = irqd->chip_data;
struct irte *irte = &ir_data->irte_entry;
@@ -1186,10 +1197,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
- if (ir_data->irq_2_iommu.posted_msi)
- intel_ir_reconfigure_irte_posted(irqd);
- else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
- modify_irte(&ir_data->irq_2_iommu, irte);
+ __intel_ir_reconfigure_irte(irqd, force_host);
}
/*
@@ -1244,7 +1252,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
/* stop posting interrupts, back to the default mode */
if (!vcpu_pi_info) {
- modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+ __intel_ir_reconfigure_irte(data, true);
} else {
struct irte irte_pi;
@@ -1267,6 +1275,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
~(-1UL << PDA_HIGH_BIT);
+ ir_data->irq_2_iommu.posted_vcpu = true;
modify_irte(&ir_data->irq_2_iommu, &irte_pi);
}
@@ -1282,43 +1291,44 @@ static struct irq_chip intel_ir_chip = {
};
/*
- * With posted MSIs, all vectors are multiplexed into a single notification
- * vector. Devices MSIs are then dispatched in a demux loop where
- * EOIs can be coalesced as well.
+ * With posted MSIs, the MSI vectors are multiplexed into a single notification
+ * vector, and only the notification vector is sent to the APIC IRR. Device
+ * MSIs are then dispatched in a demux loop that harvests the MSIs from the
+ * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to
+ * the APIC IRR, and thus do not need an EOI. The notification handler instead
+ * performs a single EOI after processing the PIR.
*
- * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
- * function. Instead EOI is performed by the posted interrupt notification
- * handler.
+ * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be
+ * honored, only the APIC EOI is omitted.
*
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
- * one apic_eoi() is needed.
+ * one apic_eoi() is needed, but each MSI needs to process pending changes to
+ * its CPU affinity.
*
* __sysvec_posted_msi_notification()
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
* irq_exit()
+ *
*/
-
-static void dummy_ack(struct irq_data *d) { }
-
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = dummy_ack,
+ .irq_ack = irq_move_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
@@ -1494,6 +1504,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
struct intel_ir_data *data = irq_data->chip_data;
struct irte entry;
+ WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu);
+ data->irq_2_iommu.posted_vcpu = false;
+
memset(&entry, 0, sizeof(entry));
modify_irte(&data->irq_2_iommu, &entry);
}
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 433c58944401..3b5251034a87 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
unsigned long flags;
int ret = 0;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
if (iommu->agaw < dmar_domain->s2_domain->agaw) {
dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
@@ -62,6 +61,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
goto unassign_tag;
info->domain = dmar_domain;
+ info->domain_attached = true;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 4674e618797c..8b5926c1452e 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -478,6 +478,7 @@ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
ops->page_response(dev, iopf, &resp);
list_del_init(&group->pending_node);
+ iopf_free_group(group);
}
mutex_unlock(&fault_param->lock);
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index de5b54eaa8bf..a5913c0b02a0 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -17,6 +17,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
+void dev_iommu_free(struct device *dev);
+
const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 83c8e617a2c5..0ad55649e2d0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -347,7 +347,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
return param;
}
-static void dev_iommu_free(struct device *dev)
+void dev_iommu_free(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
@@ -503,6 +503,9 @@ static void iommu_deinit_device(struct device *dev)
dev->iommu_group = NULL;
module_put(ops->owner);
dev_iommu_free(dev);
+#ifdef CONFIG_IOMMU_DMA
+ dev->dma_iommu = false;
+#endif
}
DEFINE_MUTEX(iommu_probe_device_lock);
@@ -2391,6 +2394,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
size_t offset, pgsize, pgsize_next;
+ size_t offset_end;
unsigned long addr_merge = paddr | iova;
/* Page sizes supported by the hardware and small enough for @size */
@@ -2431,7 +2435,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
* If size is big enough to accommodate the larger page, reduce
* the number of smaller pages.
*/
- if (offset + pgsize_next <= size)
+ if (!check_add_overflow(offset, pgsize_next, &offset_end) &&
+ offset_end <= size)
size = offset;
out_set_count:
@@ -3112,6 +3117,11 @@ int iommu_device_use_default_domain(struct device *dev)
return 0;
mutex_lock(&group->mutex);
+ /* We may race against bus_iommu_probe() finalising groups here */
+ if (!group->default_domain) {
+ ret = -EPROBE_DEFER;
+ goto unlock_out;
+ }
if (group->owner_cnt) {
if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 5fd3dd420290..74480ae6bfc0 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -3,6 +3,7 @@
*/
#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci-ats.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
@@ -352,6 +353,122 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
return 0;
}
+/* The device attach/detach/replace helpers for attach_handle */
+
+/* Check if idev is attached to igroup->hwpt */
+static bool iommufd_device_is_attached(struct iommufd_device *idev)
+{
+ struct iommufd_device *cur;
+
+ list_for_each_entry(cur, &idev->igroup->device_list, group_item)
+ if (cur == idev)
+ return true;
+ return false;
+}
+
+static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommufd_attach_handle *handle;
+ int rc;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev)
+{
+ struct iommu_attach_handle *handle;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ handle =
+ iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(handle))
+ return NULL;
+ return to_iommufd_handle(handle);
+}
+
+static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommufd_attach_handle *handle;
+
+ handle = iommufd_device_get_attach_handle(idev);
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ if (hwpt->fault) {
+ iommufd_auto_response_faults(hwpt, handle);
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(handle);
+}
+
+static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommufd_attach_handle *handle, *old_handle =
+ iommufd_device_get_attach_handle(idev);
+ int rc;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault && !old->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain,
+ &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ if (old->fault) {
+ iommufd_auto_response_faults(hwpt, old_handle);
+ if (!hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(old_handle);
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault && !old->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
@@ -488,6 +605,11 @@ iommufd_device_do_replace(struct iommufd_device *idev,
goto err_unlock;
}
+ if (!iommufd_device_is_attached(idev)) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
if (hwpt == igroup->hwpt) {
mutex_unlock(&idev->igroup->lock);
return NULL;
@@ -1127,7 +1249,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
- int rc;
+ int rc = -EINVAL;
if (!length)
return -EINVAL;
@@ -1183,7 +1305,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
+ cmd->__reserved[2])
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
@@ -1240,6 +1363,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+ cmd->out_max_pasid_log2 = 0;
+ /*
+ * Currently, all iommu drivers enable PASID in the probe_device()
+ * op if iommu and device supports it. So the max_pasids stored in
+ * dev->iommu indicates both PASID support and enable status. A
+ * non-zero dev->iommu->max_pasids means PASID is supported and
+ * enabled. The iommufd only reports PASID capability to userspace
+ * if it's enabled.
+ */
+ if (idev->dev->iommu->max_pasids) {
+ cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
+
+ if (dev_is_pci(idev->dev)) {
+ struct pci_dev *pdev = to_pci_dev(idev->dev);
+ int ctrl;
+
+ ctrl = pci_pasid_status(pdev);
+
+ WARN_ON_ONCE(ctrl < 0 ||
+ !(ctrl & PCI_PASID_CTRL_ENABLE));
+
+ if (ctrl & PCI_PASID_CTRL_EXEC)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_EXEC;
+ if (ctrl & PCI_PASID_CTRL_PRIV)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_PRIV;
+ }
+ }
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index b8393a8c0753..1b0812f8bf84 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -16,7 +16,7 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"
-static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+int iommufd_fault_iopf_enable(struct iommufd_device *idev)
{
struct device *dev = idev->dev;
int ret;
@@ -45,7 +45,7 @@ static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
return ret;
}
-static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+void iommufd_fault_iopf_disable(struct iommufd_device *idev)
{
mutex_lock(&idev->iopf_lock);
if (!WARN_ON(idev->iopf_enabled == 0)) {
@@ -93,20 +93,28 @@ int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
return ret;
}
-static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_attach_handle *handle)
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle)
{
struct iommufd_fault *fault = hwpt->fault;
struct iopf_group *group, *next;
+ struct list_head free_list;
unsigned long index;
if (!fault)
return;
+ INIT_LIST_HEAD(&free_list);
mutex_lock(&fault->mutex);
+ spin_lock(&fault->lock);
list_for_each_entry_safe(group, next, &fault->deliver, node) {
if (group->attach_handle != &handle->handle)
continue;
+ list_move(&group->node, &free_list);
+ }
+ spin_unlock(&fault->lock);
+
+ list_for_each_entry_safe(group, next, &free_list, node) {
list_del(&group->node);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
@@ -208,6 +216,7 @@ void iommufd_fault_destroy(struct iommufd_object *obj)
{
struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
struct iopf_group *group, *next;
+ unsigned long index;
/*
* The iommufd object's reference count is zero at this point.
@@ -220,6 +229,13 @@ void iommufd_fault_destroy(struct iommufd_object *obj)
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
+ xa_for_each(&fault->response, index, group) {
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_destroy(&fault->response);
+ mutex_destroy(&fault->mutex);
}
static void iommufd_compose_fault_message(struct iommu_fault *fault,
@@ -242,7 +258,7 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
{
size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_pgfault data;
+ struct iommu_hwpt_pgfault data = {};
struct iommufd_device *idev;
struct iopf_group *group;
struct iopf_fault *iopf;
@@ -253,17 +269,19 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
return -ESPIPE;
mutex_lock(&fault->mutex);
- while (!list_empty(&fault->deliver) && count > done) {
- group = list_first_entry(&fault->deliver,
- struct iopf_group, node);
-
- if (group->fault_count * fault_size > count - done)
+ while ((group = iommufd_fault_deliver_fetch(fault))) {
+ if (done >= count ||
+ group->fault_count * fault_size > count - done) {
+ iommufd_fault_deliver_restore(fault, group);
break;
+ }
rc = xa_alloc(&fault->response, &group->cookie, group,
xa_limit_32b, GFP_KERNEL);
- if (rc)
+ if (rc) {
+ iommufd_fault_deliver_restore(fault, group);
break;
+ }
idev = to_iommufd_handle(group->attach_handle)->idev;
list_for_each_entry(iopf, &group->faults, list) {
@@ -272,13 +290,12 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
group->cookie);
if (copy_to_user(buf + done, &data, fault_size)) {
xa_erase(&fault->response, group->cookie);
+ iommufd_fault_deliver_restore(fault, group);
rc = -EFAULT;
break;
}
done += fault_size;
}
-
- list_del(&group->node);
}
mutex_unlock(&fault->mutex);
@@ -336,10 +353,10 @@ static __poll_t iommufd_fault_fops_poll(struct file *filep,
__poll_t pollflags = EPOLLOUT;
poll_wait(filep, &fault->wait_queue, wait);
- mutex_lock(&fault->mutex);
+ spin_lock(&fault->lock);
if (!list_empty(&fault->deliver))
pollflags |= EPOLLIN | EPOLLRDNORM;
- mutex_unlock(&fault->mutex);
+ spin_unlock(&fault->lock);
return pollflags;
}
@@ -381,6 +398,7 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
INIT_LIST_HEAD(&fault->deliver);
xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
mutex_init(&fault->mutex);
+ spin_lock_init(&fault->lock);
init_waitqueue_head(&fault->wait_queue);
filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
@@ -429,9 +447,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group)
hwpt = group->attach_handle->domain->fault_data;
fault = hwpt->fault;
- mutex_lock(&fault->mutex);
+ spin_lock(&fault->lock);
list_add_tail(&group->node, &fault->deliver);
- mutex_unlock(&fault->mutex);
+ spin_unlock(&fault->lock);
wake_up_interruptible(&fault->wait_queue);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index d06bf6e6c19f..2454627a8b61 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -122,6 +122,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
!device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
return ERR_PTR(-EOPNOTSUPP);
+ if ((flags & IOMMU_HWPT_FAULT_ID_VALID) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return ERR_PTR(-EOPNOTSUPP);
hwpt_paging = __iommufd_object_alloc(
ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 4bf7ccd39d46..067222b238b7 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
return iter->area;
}
-static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
- unsigned long length,
- unsigned long iova_alignment,
- unsigned long page_offset)
+static bool __alloc_iova_check_range(unsigned long *start, unsigned long last,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
{
- if (span->is_used || span->last_hole - span->start_hole < length - 1)
+ unsigned long aligned_start;
+
+ /* ALIGN_UP() */
+ if (check_add_overflow(*start, iova_alignment - 1, &aligned_start))
return false;
+ aligned_start &= ~(iova_alignment - 1);
+ aligned_start |= page_offset;
- span->start_hole = ALIGN(span->start_hole, iova_alignment) |
- page_offset;
- if (span->start_hole > span->last_hole ||
- span->last_hole - span->start_hole < length - 1)
+ if (aligned_start >= last || last - aligned_start < length - 1)
return false;
+ *start = aligned_start;
return true;
}
-static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
unsigned long length,
unsigned long iova_alignment,
unsigned long page_offset)
{
- if (span->is_hole || span->last_used - span->start_used < length - 1)
+ if (span->is_used)
return false;
+ return __alloc_iova_check_range(&span->start_hole, span->last_hole,
+ length, iova_alignment, page_offset);
+}
- span->start_used = ALIGN(span->start_used, iova_alignment) |
- page_offset;
- if (span->start_used > span->last_used ||
- span->last_used - span->start_used < length - 1)
+static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
+{
+ if (span->is_hole)
return false;
- return true;
+ return __alloc_iova_check_range(&span->start_used, span->last_used,
+ length, iova_alignment, page_offset);
}
/*
@@ -696,8 +705,10 @@ again:
iommufd_access_notify_unmap(iopt, area_first, length);
/* Something is not responding to unmap requests. */
tries++;
- if (WARN_ON(tries > 100))
- return -EDEADLOCK;
+ if (WARN_ON(tries > 100)) {
+ rc = -EDEADLOCK;
+ goto out_unmapped;
+ }
goto again;
}
@@ -719,6 +730,7 @@ again:
out_unlock_iova:
up_write(&iopt->iova_rwsem);
up_read(&iopt->domains_rwsem);
+out_unmapped:
if (unmapped)
*unmapped = unmapped_bytes;
return rc;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f1d865e6fab6..18cdf1391a03 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -462,14 +462,39 @@ struct iommufd_fault {
struct iommufd_ctx *ictx;
struct file *filep;
- /* The lists of outstanding faults protected by below mutex. */
- struct mutex mutex;
+ spinlock_t lock; /* protects the deliver list */
struct list_head deliver;
+ struct mutex mutex; /* serializes response flows */
struct xarray response;
struct wait_queue_head wait_queue;
};
+/* Fetch the first node out of the fault->deliver list */
+static inline struct iopf_group *
+iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
+{
+ struct list_head *list = &fault->deliver;
+ struct iopf_group *group = NULL;
+
+ spin_lock(&fault->lock);
+ if (!list_empty(list)) {
+ group = list_first_entry(list, struct iopf_group, node);
+ list_del(&group->node);
+ }
+ spin_unlock(&fault->lock);
+ return group;
+}
+
+/* Restore a node back to the head of the fault->deliver list */
+static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
+ struct iopf_group *group)
+{
+ spin_lock(&fault->lock);
+ list_add(&group->node, &fault->deliver);
+ spin_unlock(&fault->lock);
+}
+
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
@@ -498,35 +523,10 @@ int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old);
-static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- if (hwpt->fault)
- return iommufd_fault_domain_attach_dev(hwpt, idev);
-
- return iommu_attach_group(hwpt->domain, idev->igroup->group);
-}
-
-static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- if (hwpt->fault) {
- iommufd_fault_domain_detach_dev(hwpt, idev);
- return;
- }
-
- iommu_detach_group(hwpt->domain, idev->igroup->group);
-}
-
-static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
-{
- if (old->fault || hwpt->fault)
- return iommufd_fault_domain_replace_dev(idev, hwpt, old);
-
- return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
-}
+int iommufd_fault_iopf_enable(struct iommufd_device *idev);
+void iommufd_fault_iopf_disable(struct iommufd_device *idev);
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle);
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c
index d90b9e253412..2cdc4f542df4 100644
--- a/drivers/iommu/iommufd/iova_bitmap.c
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -130,7 +130,7 @@ struct iova_bitmap {
static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
unsigned long iova)
{
- unsigned long pgsize = 1 << bitmap->mapped.pgshift;
+ unsigned long pgsize = 1UL << bitmap->mapped.pgshift;
return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
}
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b5f5d27ee963..649fe79d0f0c 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -130,7 +130,7 @@ static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
if (wait_event_timeout(ictx->destroy_wait,
refcount_read(&to_destroy->shortterm_users) ==
0,
- msecs_to_jiffies(10000)))
+ msecs_to_jiffies(60000)))
return 0;
pr_crit("Time out waiting for iommufd object to become free\n");
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index ff55b8c30712..ae69691471e9 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1087,7 +1087,7 @@ static int ipmmu_probe(struct platform_device *pdev)
* - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
*/
if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
- ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
+ ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s",
dev_name(&pdev->dev));
if (ret)
return ret;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 6a2707fe7a78..32deab732209 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1371,15 +1371,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
mutex_init(&data->mutex);
- ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
- "mtk-iommu.%pa", &ioaddr);
- if (ret)
- goto out_link_remove;
-
- ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
- if (ret)
- goto out_sysfs_remove;
-
if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
list_add_tail(&data->list, data->plat_data->hw_list);
data->hw_list = data->plat_data->hw_list;
@@ -1389,19 +1380,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->hw_list = &data->hw_list_head;
}
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+ if (ret)
+ goto out_list_del;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+ goto out_sysfs_remove;
+
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
if (ret)
- goto out_list_del;
+ goto out_device_unregister;
}
return ret;
-out_list_del:
- list_del(&data->list);
+out_device_unregister:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
-out_link_remove:
+out_list_del:
+ list_del(&data->list);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
device_link_remove(data->smicomm_dev, dev);
out_runtime_disable:
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e7a6a1611d19..e3fcab925a54 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -118,6 +118,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np)
int of_iommu_configure(struct device *dev, struct device_node *master_np,
const u32 *id)
{
+ bool dev_iommu_present;
int err;
if (!master_np)
@@ -129,6 +130,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
mutex_unlock(&iommu_probe_device_lock);
return 0;
}
+ dev_iommu_present = dev->iommu;
/*
* We don't currently walk up the tree looking for a parent IOMMU.
@@ -149,8 +151,10 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
err = of_iommu_configure_device(master_np, dev, id);
}
- if (err)
+ if (err && dev_iommu_present)
iommu_fwspec_free(dev);
+ else if (err && dev->iommu)
+ dev_iommu_free(dev);
mutex_unlock(&iommu_probe_device_lock);
if (!err && dev->bus)
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4b369419b32c..4c7f470a4752 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1154,7 +1154,6 @@ static int rk_iommu_of_xlate(struct device *dev,
iommu_dev = of_find_device_by_node(args->np);
data->iommu = platform_get_drvdata(iommu_dev);
- data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1192,6 +1191,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
+ iommu->domain = &rk_identity_domain;
+
platform_set_drvdata(pdev, iommu);
iommu->dev = dev;
iommu->num_mmu = 0;