summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd/amd_iommu.h7
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h22
-rw-r--r--drivers/iommu/amd/init.c131
-rw-r--r--drivers/iommu/amd/iommu.c88
-rw-r--r--drivers/iommu/amd/iommu_v2.c17
-rw-r--r--drivers/iommu/apple-dart.c2
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c31
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c45
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h2
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c7
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c1
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c71
-rw-r--r--drivers/iommu/dma-iommu.c26
-rw-r--r--drivers/iommu/dma-iommu.h8
-rw-r--r--drivers/iommu/hyperv-iommu.c4
-rw-r--r--drivers/iommu/intel/iommu.c263
-rw-r--r--drivers/iommu/intel/iommu.h9
-rw-r--r--drivers/iommu/intel/irq_remapping.c2
-rw-r--r--drivers/iommu/intel/pasid.c4
-rw-r--r--drivers/iommu/intel/pasid.h2
-rw-r--r--drivers/iommu/intel/svm.c70
-rw-r--r--drivers/iommu/iommu-priv.h30
-rw-r--r--drivers/iommu/iommu-sva.c29
-rw-r--r--drivers/iommu/iommu-sysfs.c8
-rw-r--r--drivers/iommu/iommu.c564
-rw-r--r--drivers/iommu/iommufd/Kconfig4
-rw-r--r--drivers/iommu/iommufd/device.c801
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c112
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c36
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h86
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h19
-rw-r--r--drivers/iommu/iommufd/main.c61
-rw-r--r--drivers/iommu/iommufd/selftest.c213
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c21
-rw-r--r--drivers/iommu/mtk_iommu.c151
-rw-r--r--drivers/iommu/of_iommu.c2
-rw-r--r--drivers/iommu/rockchip-iommu.c50
-rw-r--r--drivers/iommu/sprd-iommu.c2
-rw-r--r--drivers/iommu/tegra-smmu.c2
-rw-r--r--drivers/iommu/virtio-iommu.c2
42 files changed, 2063 insertions, 946 deletions
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 0c35018239ce..e2857109e966 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -12,13 +12,14 @@
#include "amd_iommu_types.h"
irqreturn_t amd_iommu_int_thread(int irq, void *data);
+irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data);
+irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
+irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
-int amd_iommu_init_devices(void);
-void amd_iommu_uninit_devices(void);
-void amd_iommu_init_notifier(void);
+void amd_iommu_restart_ppr_log(struct amd_iommu *iommu);
void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index dc1db6167927..7dc30c2b56b3 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -120,10 +120,13 @@
#define PASID_MASK 0x0000ffff
/* MMIO status bits */
-#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0)
+#define MMIO_STATUS_EVT_OVERFLOW_MASK BIT(0)
#define MMIO_STATUS_EVT_INT_MASK BIT(1)
#define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2)
+#define MMIO_STATUS_EVT_RUN_MASK BIT(3)
+#define MMIO_STATUS_PPR_OVERFLOW_MASK BIT(5)
#define MMIO_STATUS_PPR_INT_MASK BIT(6)
+#define MMIO_STATUS_PPR_RUN_MASK BIT(7)
#define MMIO_STATUS_GALOG_RUN_MASK BIT(8)
#define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9)
#define MMIO_STATUS_GALOG_INT_MASK BIT(10)
@@ -381,15 +384,15 @@
*/
#define DTE_FLAG_V BIT_ULL(0)
#define DTE_FLAG_TV BIT_ULL(1)
+#define DTE_FLAG_GIOV BIT_ULL(54)
+#define DTE_FLAG_GV BIT_ULL(55)
+#define DTE_GLX_SHIFT (56)
+#define DTE_GLX_MASK (3)
#define DTE_FLAG_IR BIT_ULL(61)
#define DTE_FLAG_IW BIT_ULL(62)
#define DTE_FLAG_IOTLB BIT_ULL(32)
-#define DTE_FLAG_GIOV BIT_ULL(54)
-#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_FLAG_MASK (0x3ffULL << 32)
-#define DTE_GLX_SHIFT (56)
-#define DTE_GLX_MASK (3)
#define DEV_DOMID_MASK 0xffffULL
#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
@@ -702,12 +705,21 @@ struct amd_iommu {
/* event buffer virtual address */
u8 *evt_buf;
+ /* Name for event log interrupt */
+ unsigned char evt_irq_name[16];
+
/* Base of the PPR log, if present */
u8 *ppr_log;
+ /* Name for PPR log interrupt */
+ unsigned char ppr_irq_name[16];
+
/* Base of the GA log, if present */
u8 *ga_log;
+ /* Name for GA log interrupt */
+ unsigned char ga_irq_name[16];
+
/* Tail of the GA log, if present */
u8 *ga_log_tail;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index ea0f1ab94178..45efb7e5d725 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -483,6 +483,10 @@ static void iommu_disable(struct amd_iommu *iommu)
iommu_feature_disable(iommu, CONTROL_GALOG_EN);
iommu_feature_disable(iommu, CONTROL_GAINT_EN);
+ /* Disable IOMMU PPR logging */
+ iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
+ iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
+
/* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
@@ -753,37 +757,61 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu)
}
/*
+ * Interrupt handler has processed all pending events and adjusted head
+ * and tail pointer. Reset overflow mask and restart logging again.
+ */
+static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
+ u8 cntrl_intr, u8 cntrl_log,
+ u32 status_run_mask, u32 status_overflow_mask)
+{
+ u32 status;
+
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (status & status_run_mask)
+ return;
+
+ pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
+
+ iommu_feature_disable(iommu, cntrl_log);
+ iommu_feature_disable(iommu, cntrl_intr);
+
+ writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ iommu_feature_enable(iommu, cntrl_intr);
+ iommu_feature_enable(iommu, cntrl_log);
+}
+
+/*
* This function restarts event logging in case the IOMMU experienced
* an event log buffer overflow.
*/
void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
{
- iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
- iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
+ amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
+ CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
+ MMIO_STATUS_EVT_OVERFLOW_MASK);
}
/*
* This function restarts event logging in case the IOMMU experienced
- * an GA log overflow.
+ * GA log overflow.
*/
void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
{
- u32 status;
-
- status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (status & MMIO_STATUS_GALOG_RUN_MASK)
- return;
-
- pr_info_ratelimited("IOMMU GA Log restarting\n");
-
- iommu_feature_disable(iommu, CONTROL_GALOG_EN);
- iommu_feature_disable(iommu, CONTROL_GAINT_EN);
-
- writel(MMIO_STATUS_GALOG_OVERFLOW_MASK,
- iommu->mmio_base + MMIO_STATUS_OFFSET);
+ amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
+ CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
+ MMIO_STATUS_GALOG_OVERFLOW_MASK);
+}
- iommu_feature_enable(iommu, CONTROL_GAINT_EN);
- iommu_feature_enable(iommu, CONTROL_GALOG_EN);
+/*
+ * This function restarts ppr logging in case the IOMMU experienced
+ * PPR log overflow.
+ */
+void amd_iommu_restart_ppr_log(struct amd_iommu *iommu)
+{
+ amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN,
+ CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK,
+ MMIO_STATUS_PPR_OVERFLOW_MASK);
}
/*
@@ -906,6 +934,8 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu)
if (iommu->ppr_log == NULL)
return;
+ iommu_feature_enable(iommu, CONTROL_PPR_EN);
+
entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
@@ -916,7 +946,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu)
writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
- iommu_feature_enable(iommu, CONTROL_PPR_EN);
+ iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
}
static void __init free_ppr_log(struct amd_iommu *iommu)
@@ -2311,6 +2341,7 @@ static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq
struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
irqd->chip = &intcapxt_controller;
+ irqd->hwirq = info->hwirq;
irqd->chip_data = info->data;
__irq_set_handler(i, handle_edge_irq, 0, "edge");
}
@@ -2337,22 +2368,14 @@ static void intcapxt_unmask_irq(struct irq_data *irqd)
xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
xt.destid_24_31 = cfg->dest_apicid >> 24;
- /**
- * Current IOMMU implementation uses the same IRQ for all
- * 3 IOMMU interrupts.
- */
- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
}
static void intcapxt_mask_irq(struct irq_data *irqd)
{
struct amd_iommu *iommu = irqd->chip_data;
- writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
- writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
- writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ writeq(0, iommu->mmio_base + irqd->hwirq);
}
@@ -2415,7 +2438,8 @@ static struct irq_domain *iommu_get_irqdomain(void)
return iommu_irqdomain;
}
-static int iommu_setup_intcapxt(struct amd_iommu *iommu)
+static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
+ int hwirq, irq_handler_t thread_fn)
{
struct irq_domain *domain;
struct irq_alloc_info info;
@@ -2429,6 +2453,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu)
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
info.data = iommu;
+ info.hwirq = hwirq;
irq = irq_domain_alloc_irqs(domain, 1, node, &info);
if (irq < 0) {
@@ -2437,7 +2462,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu)
}
ret = request_threaded_irq(irq, amd_iommu_int_handler,
- amd_iommu_int_thread, 0, "AMD-Vi", iommu);
+ thread_fn, 0, devname, iommu);
if (ret) {
irq_domain_free_irqs(irq, 1);
irq_domain_remove(domain);
@@ -2447,6 +2472,37 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu)
return 0;
}
+static int iommu_setup_intcapxt(struct amd_iommu *iommu)
+{
+ int ret;
+
+ snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
+ "AMD-Vi%d-Evt", iommu->index);
+ ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
+ MMIO_INTCAPXT_EVT_OFFSET,
+ amd_iommu_int_thread_evtlog);
+ if (ret)
+ return ret;
+
+ snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
+ "AMD-Vi%d-PPR", iommu->index);
+ ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
+ MMIO_INTCAPXT_PPR_OFFSET,
+ amd_iommu_int_thread_pprlog);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_IRQ_REMAP
+ snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
+ "AMD-Vi%d-GA", iommu->index);
+ ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
+ MMIO_INTCAPXT_GALOG_OFFSET,
+ amd_iommu_int_thread_galog);
+#endif
+
+ return ret;
+}
+
static int iommu_init_irq(struct amd_iommu *iommu)
{
int ret;
@@ -2472,8 +2528,6 @@ enable_faults:
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
- if (iommu->ppr_log != NULL)
- iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
return 0;
}
@@ -2889,8 +2943,6 @@ static void enable_iommus_vapic(void)
static void enable_iommus(void)
{
early_enable_iommus();
- enable_iommus_vapic();
- enable_iommus_v2();
}
static void disable_iommus(void)
@@ -3154,6 +3206,13 @@ static int amd_iommu_enable_interrupts(void)
goto out;
}
+ /*
+ * Interrupt handler is ready to process interrupts. Enable
+ * PPR and GA log interrupt for all IOMMUs.
+ */
+ enable_iommus_vapic();
+ enable_iommus_v2();
+
out:
return ret;
}
@@ -3233,8 +3292,6 @@ static int __init state_next(void)
register_syscore_ops(&amd_iommu_syscore_ops);
ret = amd_iommu_init_pci();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
- enable_iommus_vapic();
- enable_iommus_v2();
break;
case IOMMU_PCI_INIT:
ret = amd_iommu_enable_interrupts();
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c3b58a8389b9..95bd7c25ba6f 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -841,50 +841,27 @@ static inline void
amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
#endif /* !CONFIG_IRQ_REMAP */
-#define AMD_IOMMU_INT_MASK \
- (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \
- MMIO_STATUS_EVT_INT_MASK | \
- MMIO_STATUS_PPR_INT_MASK | \
- MMIO_STATUS_GALOG_OVERFLOW_MASK | \
- MMIO_STATUS_GALOG_INT_MASK)
-
-irqreturn_t amd_iommu_int_thread(int irq, void *data)
+static void amd_iommu_handle_irq(void *data, const char *evt_type,
+ u32 int_mask, u32 overflow_mask,
+ void (*int_handler)(struct amd_iommu *),
+ void (*overflow_handler)(struct amd_iommu *))
{
struct amd_iommu *iommu = (struct amd_iommu *) data;
u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ u32 mask = int_mask | overflow_mask;
- while (status & AMD_IOMMU_INT_MASK) {
+ while (status & mask) {
/* Enable interrupt sources again */
- writel(AMD_IOMMU_INT_MASK,
- iommu->mmio_base + MMIO_STATUS_OFFSET);
+ writel(mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (status & MMIO_STATUS_EVT_INT_MASK) {
- pr_devel("Processing IOMMU Event Log\n");
- iommu_poll_events(iommu);
+ if (int_handler) {
+ pr_devel("Processing IOMMU (ivhd%d) %s Log\n",
+ iommu->index, evt_type);
+ int_handler(iommu);
}
- if (status & MMIO_STATUS_PPR_INT_MASK) {
- pr_devel("Processing IOMMU PPR Log\n");
- iommu_poll_ppr_log(iommu);
- }
-
-#ifdef CONFIG_IRQ_REMAP
- if (status & (MMIO_STATUS_GALOG_INT_MASK |
- MMIO_STATUS_GALOG_OVERFLOW_MASK)) {
- pr_devel("Processing IOMMU GA Log\n");
- iommu_poll_ga_log(iommu);
- }
-
- if (status & MMIO_STATUS_GALOG_OVERFLOW_MASK) {
- pr_info_ratelimited("IOMMU GA Log overflow\n");
- amd_iommu_restart_ga_log(iommu);
- }
-#endif
-
- if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) {
- pr_info_ratelimited("IOMMU event log overflow\n");
- amd_iommu_restart_event_logging(iommu);
- }
+ if ((status & overflow_mask) && overflow_handler)
+ overflow_handler(iommu);
/*
* Hardware bug: ERBT1312
@@ -901,6 +878,43 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
*/
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
}
+}
+
+irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data)
+{
+ amd_iommu_handle_irq(data, "Evt", MMIO_STATUS_EVT_INT_MASK,
+ MMIO_STATUS_EVT_OVERFLOW_MASK,
+ iommu_poll_events, amd_iommu_restart_event_logging);
+
+ return IRQ_HANDLED;
+}
+
+irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data)
+{
+ amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK,
+ MMIO_STATUS_PPR_OVERFLOW_MASK,
+ iommu_poll_ppr_log, amd_iommu_restart_ppr_log);
+
+ return IRQ_HANDLED;
+}
+
+irqreturn_t amd_iommu_int_thread_galog(int irq, void *data)
+{
+#ifdef CONFIG_IRQ_REMAP
+ amd_iommu_handle_irq(data, "GA", MMIO_STATUS_GALOG_INT_MASK,
+ MMIO_STATUS_GALOG_OVERFLOW_MASK,
+ iommu_poll_ga_log, amd_iommu_restart_ga_log);
+#endif
+
+ return IRQ_HANDLED;
+}
+
+irqreturn_t amd_iommu_int_thread(int irq, void *data)
+{
+ amd_iommu_int_thread_evtlog(irq, data);
+ amd_iommu_int_thread_pprlog(irq, data);
+ amd_iommu_int_thread_galog(irq, data);
+
return IRQ_HANDLED;
}
@@ -3681,7 +3695,7 @@ static int amd_ir_set_affinity(struct irq_data *data,
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return IRQ_SET_MASK_OK_DONE;
}
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 261352a23271..57c2fb1146e2 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -262,8 +262,8 @@ static void put_pasid_state(struct pasid_state *pasid_state)
static void put_pasid_state_wait(struct pasid_state *pasid_state)
{
- refcount_dec(&pasid_state->count);
- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
+ if (!refcount_dec_and_test(&pasid_state->count))
+ wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
free_pasid_state(pasid_state);
}
@@ -327,6 +327,9 @@ static void free_pasid_states(struct device_state *dev_state)
put_pasid_state(pasid_state);
+ /* Clear the pasid state so that the pasid can be re-used */
+ clear_pasid_state(dev_state, pasid_state->pasid);
+
/*
* This will call the mn_release function and
* unbind the PASID
@@ -355,9 +358,9 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
return container_of(mn, struct pasid_state, mn);
}
-static void mn_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
@@ -391,8 +394,8 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
}
static const struct mmu_notifier_ops iommu_mn = {
- .release = mn_release,
- .invalidate_range = mn_invalidate_range,
+ .release = mn_release,
+ .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
};
static void set_pri_tag_status(struct pasid_state *pasid_state,
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af64b57f048..2082081402d3 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -1276,7 +1276,7 @@ static __maybe_unused int apple_dart_resume(struct device *dev)
return 0;
}
-DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume);
static const struct of_device_id apple_dart_of_match[] = {
{ .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 },
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a5a63b1c947e..4d83edc2be99 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -80,7 +80,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
- arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
+ arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
@@ -186,9 +186,10 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
}
}
-static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
@@ -200,10 +201,20 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
* range. So do a simple translation here by calculating size correctly.
*/
size = end - start;
+ if (size == ULONG_MAX)
+ size = 0;
+
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) {
+ if (!size)
+ arm_smmu_tlb_inv_asid(smmu_domain->smmu,
+ smmu_mn->cd->asid);
+ else
+ arm_smmu_tlb_inv_range_asid(start, size,
+ smmu_mn->cd->asid,
+ PAGE_SIZE, false,
+ smmu_domain);
+ }
- if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
- arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
- PAGE_SIZE, false, smmu_domain);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size);
}
@@ -237,9 +248,9 @@ static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
}
static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = {
- .invalidate_range = arm_smmu_mm_invalidate_range,
- .release = arm_smmu_mm_release,
- .free_notifier = arm_smmu_mmu_notifier_free,
+ .arch_invalidate_secondary_tlbs = arm_smmu_mm_arch_invalidate_secondary_tlbs,
+ .release = arm_smmu_mm_release,
+ .free_notifier = arm_smmu_mmu_notifier_free,
};
/* Allocate or get existing MMU notifier for this {domain, mm} pair */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9b0dc3505601..e82bf1c449a3 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1059,7 +1059,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
/*
* This function handles the following cases:
*
- * (1) Install primary CD, for normal DMA traffic (SSID = 0).
+ * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
* (2) Install a secondary CD, for SID+SSID traffic.
* (3) Update ASID of a CD. Atomically write the first 64 bits of the
* CD, then invalidate the old entry and mappings.
@@ -1607,7 +1607,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
sid = FIELD_GET(PRIQ_0_SID, evt[0]);
ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
- ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
+ ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
@@ -1748,7 +1748,7 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
*/
*cmd = (struct arm_smmu_cmdq_ent) {
.opcode = CMDQ_OP_ATC_INV,
- .substream_valid = !!ssid,
+ .substream_valid = (ssid != IOMMU_NO_PASID),
.atc.ssid = ssid,
};
@@ -1795,7 +1795,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_cmdq_batch cmds;
- arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
+ arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
cmds.num = 0;
for (i = 0; i < master->num_streams; i++) {
@@ -1875,7 +1875,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
- arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
+ arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
}
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
@@ -1968,7 +1968,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
- arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
+ arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
}
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
@@ -2055,24 +2055,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
return &smmu_domain->domain;
}
-static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
-{
- int idx, size = 1 << span;
-
- do {
- idx = find_first_zero_bit(map, size);
- if (idx == size)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
-}
-
-static void arm_smmu_bitmap_free(unsigned long *map, int idx)
-{
- clear_bit(idx, map);
-}
-
static void arm_smmu_domain_free(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2093,7 +2075,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
if (cfg->vmid)
- arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
+ ida_free(&smmu->vmid_map, cfg->vmid);
}
kfree(smmu_domain);
@@ -2142,7 +2124,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
* the master has been added to the devices list for this domain.
* This isn't an issue because the STE hasn't been installed yet.
*/
- ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
+ ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
if (ret)
goto out_free_cd_tables;
@@ -2167,7 +2149,9 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
- vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
+ /* Reserve VMID 0 for stage-2 bypass STEs */
+ vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
+ GFP_KERNEL);
if (vmid < 0)
return vmid;
@@ -2328,7 +2312,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master)
pdev = to_pci_dev(master->dev);
atomic_inc(&smmu_domain->nr_ats_masters);
- arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
+ arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
if (pci_enable_ats(pdev, stu))
dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
@@ -3098,8 +3082,8 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
reg |= STRTAB_BASE_RA;
smmu->strtab_cfg.strtab_base = reg;
- /* Allocate the first VMID for stage-2 bypass STEs */
- set_bit(0, smmu->vmid_map);
+ ida_init(&smmu->vmid_map);
+
return 0;
}
@@ -3923,6 +3907,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_device_disable(smmu);
iopf_queue_free(smmu->evtq.iopf);
+ ida_destroy(&smmu->vmid_map);
}
static void arm_smmu_device_shutdown(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index dcab85698a4e..9915850dd4db 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -670,7 +670,7 @@ struct arm_smmu_device {
#define ARM_SMMU_MAX_VMIDS (1 << 16)
unsigned int vmid_bits;
- DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+ struct ida vmid_map;
unsigned int ssid_bits;
unsigned int sid_bits;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index b5b14108e086..bb89d49adf8d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -3,7 +3,7 @@
* Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*/
-#include <linux/of_device.h>
+#include <linux/device.h>
#include <linux/firmware/qcom/qcom_scm.h>
#include <linux/ratelimit.h>
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index c71afda79d64..7f52ac67495f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -251,10 +251,12 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sc8280xp-mdss" },
- { .compatible = "qcom,sm8150-mdss" },
- { .compatible = "qcom,sm8250-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
+ { .compatible = "qcom,sm6350-mdss" },
+ { .compatible = "qcom,sm6375-mdss" },
+ { .compatible = "qcom,sm8150-mdss" },
+ { .compatible = "qcom,sm8250-mdss" },
{ }
};
@@ -528,6 +530,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
+ { .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index a86acd76c1df..d6d1a2a55cc0 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -29,7 +29,6 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index a503ed758ec3..775a3cbaff4e 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -22,8 +22,7 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
@@ -51,14 +50,15 @@ struct qcom_iommu_dev {
struct clk_bulk_data clks[CLK_NUM];
void __iomem *local_base;
u32 sec_id;
- u8 num_ctxs;
- struct qcom_iommu_ctx *ctxs[]; /* indexed by asid-1 */
+ u8 max_asid;
+ struct qcom_iommu_ctx *ctxs[]; /* indexed by asid */
};
struct qcom_iommu_ctx {
struct device *dev;
void __iomem *base;
bool secure_init;
+ bool secured_ctx;
u8 asid; /* asid and ctx bank # are 1:1 */
struct iommu_domain *domain;
};
@@ -94,7 +94,7 @@ static struct qcom_iommu_ctx * to_ctx(struct qcom_iommu_domain *d, unsigned asid
struct qcom_iommu_dev *qcom_iommu = d->iommu;
if (!qcom_iommu)
return NULL;
- return qcom_iommu->ctxs[asid - 1];
+ return qcom_iommu->ctxs[asid];
}
static inline void
@@ -273,6 +273,19 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
ctx->secure_init = true;
}
+ /* Secured QSMMU-500/QSMMU-v2 contexts cannot be programmed */
+ if (ctx->secured_ctx) {
+ ctx->domain = domain;
+ continue;
+ }
+
+ /* Disable context bank before programming */
+ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+ /* Clear context bank fault address fault status registers */
+ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0);
+ iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
+
/* TTBRs */
iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr |
@@ -527,11 +540,10 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
qcom_iommu = platform_get_drvdata(iommu_pdev);
/* make sure the asid specified in dt is valid, so we don't have
- * to sanity check this elsewhere, since 'asid - 1' is used to
- * index into qcom_iommu->ctxs:
+ * to sanity check this elsewhere:
*/
- if (WARN_ON(asid < 1) ||
- WARN_ON(asid > qcom_iommu->num_ctxs)) {
+ if (WARN_ON(asid > qcom_iommu->max_asid) ||
+ WARN_ON(qcom_iommu->ctxs[asid] == NULL)) {
put_device(&iommu_pdev->dev);
return -EINVAL;
}
@@ -617,7 +629,8 @@ free_mem:
static int get_asid(const struct device_node *np)
{
- u32 reg;
+ u32 reg, val;
+ int asid;
/* read the "reg" property directly to get the relative address
* of the context bank, and calculate the asid from that:
@@ -625,7 +638,17 @@ static int get_asid(const struct device_node *np)
if (of_property_read_u32_index(np, "reg", 0, &reg))
return -ENODEV;
- return reg / 0x1000; /* context banks are 0x1000 apart */
+ /*
+ * Context banks are 0x1000 apart but, in some cases, the ASID
+ * number doesn't match to this logic and needs to be passed
+ * from the DT configuration explicitly.
+ */
+ if (!of_property_read_u32(np, "qcom,ctx-asid", &val))
+ asid = val;
+ else
+ asid = reg / 0x1000;
+
+ return asid;
}
static int qcom_iommu_ctx_probe(struct platform_device *pdev)
@@ -633,7 +656,6 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev)
struct qcom_iommu_ctx *ctx;
struct device *dev = &pdev->dev;
struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev->parent);
- struct resource *res;
int ret, irq;
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
@@ -643,19 +665,22 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev)
ctx->dev = dev;
platform_set_drvdata(pdev, ctx);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ctx->base = devm_ioremap_resource(dev, res);
+ ctx->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ctx->base))
return PTR_ERR(ctx->base);
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return -ENODEV;
+ return irq;
+
+ if (of_device_is_compatible(dev->of_node, "qcom,msm-iommu-v2-sec"))
+ ctx->secured_ctx = true;
/* clear IRQs before registering fault handler, just in case the
* boot-loader left us a surprise:
*/
- iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR));
+ if (!ctx->secured_ctx)
+ iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR));
ret = devm_request_irq(dev, irq,
qcom_iommu_fault,
@@ -677,7 +702,7 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev)
dev_dbg(dev, "found asid %u\n", ctx->asid);
- qcom_iommu->ctxs[ctx->asid - 1] = ctx;
+ qcom_iommu->ctxs[ctx->asid] = ctx;
return 0;
}
@@ -689,12 +714,14 @@ static void qcom_iommu_ctx_remove(struct platform_device *pdev)
platform_set_drvdata(pdev, NULL);
- qcom_iommu->ctxs[ctx->asid - 1] = NULL;
+ qcom_iommu->ctxs[ctx->asid] = NULL;
}
static const struct of_device_id ctx_of_match[] = {
{ .compatible = "qcom,msm-iommu-v1-ns" },
{ .compatible = "qcom,msm-iommu-v1-sec" },
+ { .compatible = "qcom,msm-iommu-v2-ns" },
+ { .compatible = "qcom,msm-iommu-v2-sec" },
{ /* sentinel */ }
};
@@ -712,7 +739,8 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu)
struct device_node *child;
for_each_child_of_node(qcom_iommu->dev->of_node, child) {
- if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) {
+ if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec") ||
+ of_device_is_compatible(child, "qcom,msm-iommu-v2-sec")) {
of_node_put(child);
return true;
}
@@ -736,11 +764,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
for_each_child_of_node(dev->of_node, child)
max_asid = max(max_asid, get_asid(child));
- qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid),
+ qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid + 1),
GFP_KERNEL);
if (!qcom_iommu)
return -ENOMEM;
- qcom_iommu->num_ctxs = max_asid;
+ qcom_iommu->max_asid = max_asid;
qcom_iommu->dev = dev;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -856,6 +884,7 @@ static const struct dev_pm_ops qcom_iommu_pm_ops = {
static const struct of_device_id qcom_iommu_of_match[] = {
{ .compatible = "qcom,msm-iommu-v1" },
+ { .compatible = "qcom,msm-iommu-v2" },
{ /* sentinel */ }
};
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e57724163835..4b1a88f514c9 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -660,7 +660,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
- unsigned long shift, iova_len, iova = 0;
+ unsigned long shift, iova_len, iova;
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
cookie->msi_iova += size;
@@ -675,15 +675,29 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
if (domain->geometry.force_aperture)
dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
- /* Try to get PCI devices a SAC address */
- if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
+ /*
+ * Try to use all the 32-bit PCI addresses first. The original SAC vs.
+ * DAC reasoning loses relevance with PCIe, but enough hardware and
+ * firmware bugs are still lurking out there that it's safest not to
+ * venture into the 64-bit space until necessary.
+ *
+ * If your device goes wrong after seeing the notice then likely either
+ * its driver is not setting DMA masks accurately, the hardware has
+ * some inherent bug in handling >32-bit addresses, or not all the
+ * expected address bits are wired up between the device and the IOMMU.
+ */
+ if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) {
iova = alloc_iova_fast(iovad, iova_len,
DMA_BIT_MASK(32) >> shift, false);
+ if (iova)
+ goto done;
- if (!iova)
- iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
- true);
+ dev->iommu->pci_32bit_workaround = false;
+ dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit));
+ }
+ iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true);
+done:
return (dma_addr_t)iova << shift;
}
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index 942790009292..c829f1f82a99 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -17,6 +17,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
extern bool iommu_dma_forcedac;
+static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
+{
+ dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
+}
#else /* CONFIG_IOMMU_DMA */
@@ -38,5 +42,9 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he
{
}
+static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
+{
+}
+
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 8302db7f783e..8a5c17b97310 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -51,7 +51,7 @@ static int hyperv_ir_set_affinity(struct irq_data *data,
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return 0;
}
@@ -257,7 +257,7 @@ static int hyperv_root_ir_set_affinity(struct irq_data *data,
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return 0;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5c8c5cdc36cf..5db283c17e0d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
#include <linux/tboot.h>
+#include <uapi/linux/iommufd.h>
#include "iommu.h"
#include "../dma-iommu.h"
@@ -113,13 +114,17 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
-static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
+static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn)
{
return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
}
+static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn)
+{
+ return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1;
+}
static inline unsigned long page_to_dma_pfn(struct page *pg)
{
- return mm_to_dma_pfn(page_to_pfn(pg));
+ return mm_to_dma_pfn_start(page_to_pfn(pg));
}
static inline unsigned long virt_to_dma_pfn(void *p)
{
@@ -877,7 +882,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
}
/* For request-without-pasid, get the pasid from context entry */
if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
- pasid = PASID_RID2PASID;
+ pasid = IOMMU_NO_PASID;
dir_index = pasid >> PASID_PDE_SHIFT;
pde = &dir[dir_index];
@@ -1359,6 +1364,7 @@ domain_lookup_dev_info(struct dmar_domain *domain,
static void domain_update_iotlb(struct dmar_domain *domain)
{
+ struct dev_pasid_info *dev_pasid;
struct device_domain_info *info;
bool has_iotlb_device = false;
unsigned long flags;
@@ -1370,6 +1376,14 @@ static void domain_update_iotlb(struct dmar_domain *domain)
break;
}
}
+
+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+ info = dev_iommu_priv_get(dev_pasid->dev);
+ if (info->ats_enabled) {
+ has_iotlb_device = true;
+ break;
+ }
+ }
domain->has_iotlb_device = has_iotlb_device;
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1449,12 +1463,13 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, addr, mask);
- quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep);
+ quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
u64 addr, unsigned mask)
{
+ struct dev_pasid_info *dev_pasid;
struct device_domain_info *info;
unsigned long flags;
@@ -1464,6 +1479,36 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
+
+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+ info = dev_iommu_priv_get(dev_pasid->dev);
+
+ if (!info->ats_enabled)
+ continue;
+
+ qi_flush_dev_iotlb_pasid(info->iommu,
+ PCI_DEVID(info->bus, info->devfn),
+ info->pfsid, dev_pasid->pasid,
+ info->ats_qdep, addr,
+ mask);
+ }
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
+ struct dmar_domain *domain, u64 addr,
+ unsigned long npages, bool ih)
+{
+ u16 did = domain_id_iommu(domain, iommu);
+ struct dev_pasid_info *dev_pasid;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
+ qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih);
+
+ if (!list_empty(&domain->devices))
+ qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1484,7 +1529,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
ih = 1 << 6;
if (domain->use_first_level) {
- qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
+ domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
} else {
unsigned long bitmask = aligned_pages - 1;
@@ -1554,7 +1599,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain)
u16 did = domain_id_iommu(dmar_domain, iommu);
if (dmar_domain->use_first_level)
- qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
+ domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0,
DMA_TLB_DSI_FLUSH);
@@ -1726,6 +1771,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->use_first_level = true;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
+ INIT_LIST_HEAD(&domain->dev_pasids);
spin_lock_init(&domain->lock);
xa_init(&domain->iommu_array);
@@ -1940,7 +1986,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
context_pdts(pds);
/* Setup the RID_PASID field: */
- context_set_sm_rid2pasid(context, PASID_RID2PASID);
+ context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
/*
* Setup the Device-TLB enable bit and Page request
@@ -2362,8 +2408,8 @@ static int __init si_domain_init(int hw)
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn(start_pfn),
- mm_to_dma_pfn(end_pfn));
+ mm_to_dma_pfn_start(start_pfn),
+ mm_to_dma_pfn_end(end_pfn));
if (ret)
return ret;
}
@@ -2384,8 +2430,8 @@ static int __init si_domain_init(int hw)
continue;
ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn(start >> PAGE_SHIFT),
- mm_to_dma_pfn(end >> PAGE_SHIFT));
+ mm_to_dma_pfn_start(start >> PAGE_SHIFT),
+ mm_to_dma_pfn_end(end >> PAGE_SHIFT));
if (ret)
return ret;
}
@@ -2420,13 +2466,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
/* Setup the PASID entry for requests without PASID: */
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
- dev, PASID_RID2PASID);
+ dev, IOMMU_NO_PASID);
else if (domain->use_first_level)
ret = domain_setup_first_level(iommu, domain, dev,
- PASID_RID2PASID);
+ IOMMU_NO_PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
- dev, PASID_RID2PASID);
+ dev, IOMMU_NO_PASID);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
device_block_translation(dev);
@@ -2446,30 +2492,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
return 0;
}
-static bool device_has_rmrr(struct device *dev)
-{
- struct dmar_rmrr_unit *rmrr;
- struct device *tmp;
- int i;
-
- rcu_read_lock();
- for_each_rmrr_units(rmrr) {
- /*
- * Return TRUE if this RMRR contains the device that
- * is passed in.
- */
- for_each_active_dev_scope(rmrr->devices,
- rmrr->devices_cnt, i, tmp)
- if (tmp == dev ||
- is_downstream_to_pci_bridge(dev, tmp)) {
- rcu_read_unlock();
- return true;
- }
- }
- rcu_read_unlock();
- return false;
-}
-
/**
* device_rmrr_is_relaxable - Test whether the RMRR of this device
* is relaxable (ie. is allowed to be not enforced under some conditions)
@@ -2500,34 +2522,6 @@ static bool device_rmrr_is_relaxable(struct device *dev)
}
/*
- * There are a couple cases where we need to restrict the functionality of
- * devices associated with RMRRs. The first is when evaluating a device for
- * identity mapping because problems exist when devices are moved in and out
- * of domains and their respective RMRR information is lost. This means that
- * a device with associated RMRRs will never be in a "passthrough" domain.
- * The second is use of the device through the IOMMU API. This interface
- * expects to have full control of the IOVA space for the device. We cannot
- * satisfy both the requirement that RMRR access is maintained and have an
- * unencumbered IOVA space. We also have no ability to quiesce the device's
- * use of the RMRR space or even inform the IOMMU API user of the restriction.
- * We therefore prevent devices associated with an RMRR from participating in
- * the IOMMU API, which eliminates them from device assignment.
- *
- * In both cases, devices which have relaxable RMRRs are not concerned by this
- * restriction. See device_rmrr_is_relaxable comment.
- */
-static bool device_is_rmrr_locked(struct device *dev)
-{
- if (!device_has_rmrr(dev))
- return false;
-
- if (device_rmrr_is_relaxable(dev))
- return false;
-
- return true;
-}
-
-/*
* Return the required default domain type for a specific device.
*
* @dev: the device in query
@@ -3560,8 +3554,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
- unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
- unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+ unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn);
+ unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn +
mhp->nr_pages - 1);
switch (val) {
@@ -3756,7 +3750,6 @@ static int __init probe_acpi_namespace_devices(void)
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, dev) {
struct acpi_device_physical_node *pn;
- struct iommu_group *group;
struct acpi_device *adev;
if (dev->bus != &acpi_bus_type)
@@ -3766,12 +3759,6 @@ static int __init probe_acpi_namespace_devices(void)
mutex_lock(&adev->physical_node_lock);
list_for_each_entry(pn,
&adev->physical_node_list, node) {
- group = iommu_group_get(pn->dev);
- if (group) {
- iommu_group_put(group);
- continue;
- }
-
ret = iommu_probe_device(pn->dev);
if (ret)
break;
@@ -3968,7 +3955,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
if (!dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
- PASID_RID2PASID, false);
+ IOMMU_NO_PASID, false);
iommu_disable_pci_caps(info);
domain_context_clear(info);
@@ -3997,7 +3984,7 @@ static void device_block_translation(struct device *dev)
if (!dev_is_real_dma_subdevice(dev)) {
if (sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, dev,
- PASID_RID2PASID, false);
+ IOMMU_NO_PASID, false);
else
domain_context_clear(info);
}
@@ -4139,12 +4126,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
- if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
- device_is_rmrr_locked(dev)) {
- dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
- return -EPERM;
- }
-
if (info->domain)
device_block_translation(dev);
@@ -4271,7 +4252,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
unsigned long i;
nrpages = aligned_nrpages(gather->start, size);
- start_pfn = mm_to_dma_pfn(iova_pfn);
+ start_pfn = mm_to_dma_pfn_start(iova_pfn);
xa_for_each(&dmar_domain->iommu_array, i, info)
iommu_flush_iotlb_psi(info->iommu, dmar_domain,
@@ -4331,7 +4312,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
list_for_each_entry(info, &domain->devices, link)
intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
- PASID_RID2PASID);
+ IOMMU_NO_PASID);
}
static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
@@ -4713,27 +4694,118 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
{
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct dev_pasid_info *curr, *dev_pasid = NULL;
+ struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
+ unsigned long flags;
- /* Domain type specific cleanup: */
domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
- if (domain) {
- switch (domain->type) {
- case IOMMU_DOMAIN_SVA:
- intel_svm_remove_dev_pasid(dev, pasid);
- break;
- default:
- /* should never reach here */
- WARN_ON(1);
+ if (WARN_ON_ONCE(!domain))
+ goto out_tear_down;
+
+ /*
+ * The SVA implementation needs to handle its own stuffs like the mm
+ * notification. Before consolidating that code into iommu core, let
+ * the intel sva code handle it.
+ */
+ if (domain->type == IOMMU_DOMAIN_SVA) {
+ intel_svm_remove_dev_pasid(dev, pasid);
+ goto out_tear_down;
+ }
+
+ dmar_domain = to_dmar_domain(domain);
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
+ if (curr->dev == dev && curr->pasid == pasid) {
+ list_del(&curr->link_domain);
+ dev_pasid = curr;
break;
}
}
+ WARN_ON_ONCE(!dev_pasid);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ domain_detach_iommu(dmar_domain, iommu);
+ kfree(dev_pasid);
+out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+ intel_drain_pasid_prq(dev, pasid);
+}
+
+static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ struct dev_pasid_info *dev_pasid;
+ unsigned long flags;
+ int ret;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ if (context_copied(iommu, info->bus, info->devfn))
+ return -EBUSY;
+
+ ret = prepare_domain_attach_device(domain, dev);
+ if (ret)
+ return ret;
+
+ dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
+ if (!dev_pasid)
+ return -ENOMEM;
+
+ ret = domain_attach_iommu(dmar_domain, iommu);
+ if (ret)
+ goto out_free;
+
+ if (domain_type_is_si(dmar_domain))
+ ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
+ dev, pasid);
+ else if (dmar_domain->use_first_level)
+ ret = domain_setup_first_level(iommu, dmar_domain,
+ dev, pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, dmar_domain,
+ dev, pasid);
+ if (ret)
+ goto out_detach_iommu;
+
+ dev_pasid->dev = dev;
+ dev_pasid->pasid = pasid;
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+
+ return 0;
+out_detach_iommu:
+ domain_detach_iommu(dmar_domain, iommu);
+out_free:
+ kfree(dev_pasid);
+ return ret;
+}
+
+static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct iommu_hw_info_vtd *vtd;
+
+ vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
+ if (!vtd)
+ return ERR_PTR(-ENOMEM);
+
+ vtd->cap_reg = iommu->cap;
+ vtd->ecap_reg = iommu->ecap;
+ *length = sizeof(*vtd);
+ *type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
+ return vtd;
}
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
+ .hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
@@ -4751,6 +4823,7 @@ const struct iommu_ops intel_iommu_ops = {
#endif
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = intel_iommu_attach_device,
+ .set_dev_pasid = intel_iommu_set_dev_pasid,
.map_pages = intel_iommu_map_pages,
.unmap_pages = intel_iommu_unmap_pages,
.iotlb_sync_map = intel_iommu_iotlb_sync_map,
@@ -4987,7 +5060,7 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
return;
sid = PCI_DEVID(info->bus, info->devfn);
- if (pasid == PASID_RID2PASID) {
+ if (pasid == IOMMU_NO_PASID) {
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, address, mask);
} else {
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1c5e1d88862b..c18fb699c87a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -595,6 +595,7 @@ struct dmar_domain {
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */
+ struct list_head dev_pasids; /* all attached pasids */
struct dma_pte *pgd; /* virtual address */
int gaw; /* max guest address width */
@@ -717,6 +718,12 @@ struct device_domain_info {
struct pasid_table *pasid_table; /* pasid table */
};
+struct dev_pasid_info {
+ struct list_head link_domain; /* link to domain siblings */
+ struct device *dev;
+ ioasid_t pasid;
+};
+
static inline void __iommu_flush_cache(
struct intel_iommu *iommu, void *addr, int size)
{
@@ -844,6 +851,7 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(void);
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
+void intel_drain_pasid_prq(struct device *dev, u32 pasid);
struct intel_svm_dev {
struct list_head list;
@@ -862,6 +870,7 @@ struct intel_svm {
};
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
+static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
static inline struct iommu_domain *intel_svm_domain_alloc(void)
{
return NULL;
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 08f56326e2f8..29b9e55dcf26 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1176,7 +1176,7 @@ intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return IRQ_SET_MASK_OK_DONE;
}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index c5d479770e12..8f92b92f3d2a 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -129,7 +129,7 @@ int intel_pasid_alloc_table(struct device *dev)
info->pasid_table = pasid_table;
if (!ecap_coherent(info->iommu->ecap))
- clflush_cache_range(pasid_table->table, size);
+ clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
return 0;
}
@@ -438,7 +438,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
* SVA usage, device could do DMA with multiple PASIDs. It is more
* efficient to flush devTLB specific to the PASID.
*/
- if (pasid == PASID_RID2PASID)
+ if (pasid == IOMMU_NO_PASID)
qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
else
qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index d6b7d21244b1..4e9e68c3c388 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -10,8 +10,6 @@
#ifndef __INTEL_PASID_H
#define __INTEL_PASID_H
-#define PASID_RID2PASID 0x0
-#define PASID_MIN 0x1
#define PASID_MAX 0x100000
#define PASID_PTE_MASK 0x3F
#define PASID_PTE_PRESENT 1
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index e95b339e9cdc..50a481c895b8 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -26,8 +26,6 @@
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
-static void intel_svm_drain_prq(struct device *dev, u32 pasid);
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
static DEFINE_XARRAY_ALLOC(pasid_private_array);
static int pasid_private_add(ioasid_t pasid, void *priv)
@@ -219,9 +217,9 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
}
/* Pages have been freed at this point */
-static void intel_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@@ -256,11 +254,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
- .invalidate_range = intel_invalidate_range,
+ .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
};
-static DEFINE_MUTEX(pasid_mutex);
-
static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm **rsvm,
struct intel_svm_dev **rsdev)
@@ -268,10 +264,6 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm_dev *sdev = NULL;
struct intel_svm *svm;
- /* The caller should hold the pasid_mutex lock */
- if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
- return -EINVAL;
-
if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
return -EINVAL;
@@ -371,37 +363,23 @@ free_svm:
return ret;
}
-/* Caller must hold pasid_mutex */
-static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
+void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
{
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
struct mm_struct *mm;
- int ret = -EINVAL;
iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
- goto out;
+ return;
- ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
- if (ret)
- goto out;
+ if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
+ return;
mm = svm->mm;
if (sdev) {
list_del_rcu(&sdev->list);
- /*
- * Flush the PASID cache and IOTLB for this device.
- * Note that we do depend on the hardware *not* using
- * the PASID any more. Just as we depend on other
- * devices never using PASIDs that they have no right
- * to use. We have a *shared* PASID table, because it's
- * large and has to be physically contiguous. So it's
- * hard to be as defensive as we might like.
- */
- intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
- intel_svm_drain_prq(dev, svm->pasid);
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
@@ -418,8 +396,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree(svm);
}
}
-out:
- return ret;
}
/* Page request queue descriptor */
@@ -460,7 +436,7 @@ static bool is_canonical_address(u64 addr)
}
/**
- * intel_svm_drain_prq - Drain page requests and responses for a pasid
+ * intel_drain_pasid_prq - Drain page requests and responses for a pasid
* @dev: target device
* @pasid: pasid for draining
*
@@ -474,7 +450,7 @@ static bool is_canonical_address(u64 addr)
* described in VT-d spec CH7.10 to drain all page requests and page
* responses pending in the hardware.
*/
-static void intel_svm_drain_prq(struct device *dev, u32 pasid)
+void intel_drain_pasid_prq(struct device *dev, u32 pasid)
{
struct device_domain_info *info;
struct dmar_domain *domain;
@@ -520,19 +496,7 @@ prq_retry:
goto prq_retry;
}
- /*
- * A work in IO page fault workqueue may try to lock pasid_mutex now.
- * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
- * all works in the workqueue to finish may cause deadlock.
- *
- * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
- * Unlock it to allow the works to be handled while waiting for
- * them to finish.
- */
- lockdep_assert_held(&pasid_mutex);
- mutex_unlock(&pasid_mutex);
iopf_queue_flush_dev(dev);
- mutex_lock(&pasid_mutex);
/*
* Perform steps described in VT-d spec CH7.10 to drain page
@@ -827,26 +791,14 @@ out:
return ret;
}
-void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
-{
- mutex_lock(&pasid_mutex);
- intel_svm_unbind_mm(dev, pasid);
- mutex_unlock(&pasid_mutex);
-}
-
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
- int ret;
- mutex_lock(&pasid_mutex);
- ret = intel_svm_bind_mm(iommu, dev, mm);
- mutex_unlock(&pasid_mutex);
-
- return ret;
+ return intel_svm_bind_mm(iommu, dev, mm);
}
static void intel_svm_domain_free(struct iommu_domain *domain)
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
new file mode 100644
index 000000000000..2024a2313348
--- /dev/null
+++ b/drivers/iommu/iommu-priv.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __LINUX_IOMMU_PRIV_H
+#define __LINUX_IOMMU_PRIV_H
+
+#include <linux/iommu.h>
+
+static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
+{
+ /*
+ * Assume that valid ops must be installed if iommu_probe_device()
+ * has succeeded. The device ops are essentially for internal use
+ * within the IOMMU subsystem itself, so we should be able to trust
+ * ourselves not to misuse the helper.
+ */
+ return dev->iommu->iommu_dev->ops;
+}
+
+int iommu_group_replace_domain(struct iommu_group *group,
+ struct iommu_domain *new_domain);
+
+int iommu_device_register_bus(struct iommu_device *iommu,
+ const struct iommu_ops *ops, struct bus_type *bus,
+ struct notifier_block *nb);
+void iommu_device_unregister_bus(struct iommu_device *iommu,
+ struct bus_type *bus,
+ struct notifier_block *nb);
+
+#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 05c0fb2acbc4..b78671a8a914 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -10,34 +10,30 @@
#include "iommu-sva.h"
static DEFINE_MUTEX(iommu_sva_lock);
-static DEFINE_IDA(iommu_global_pasid_ida);
/* Allocate a PASID for the mm within range (inclusive) */
-static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
+static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev)
{
+ ioasid_t pasid;
int ret = 0;
- if (min == IOMMU_PASID_INVALID ||
- max == IOMMU_PASID_INVALID ||
- min == 0 || max < min)
- return -EINVAL;
-
if (!arch_pgtable_dma_compat(mm))
return -EBUSY;
mutex_lock(&iommu_sva_lock);
/* Is a PASID already associated with this mm? */
if (mm_valid_pasid(mm)) {
- if (mm->pasid < min || mm->pasid > max)
+ if (mm->pasid >= dev->iommu->max_pasids)
ret = -EOVERFLOW;
goto out;
}
- ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL);
- if (ret < 0)
+ pasid = iommu_alloc_global_pasid(dev);
+ if (pasid == IOMMU_PASID_INVALID) {
+ ret = -ENOSPC;
goto out;
-
- mm->pasid = ret;
+ }
+ mm->pasid = pasid;
ret = 0;
out:
mutex_unlock(&iommu_sva_lock);
@@ -64,15 +60,10 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
{
struct iommu_domain *domain;
struct iommu_sva *handle;
- ioasid_t max_pasids;
int ret;
- max_pasids = dev->iommu->max_pasids;
- if (!max_pasids)
- return ERR_PTR(-EOPNOTSUPP);
-
/* Allocate mm->pasid if necessary. */
- ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1);
+ ret = iommu_sva_alloc_pasid(mm, dev);
if (ret)
return ERR_PTR(ret);
@@ -217,5 +208,5 @@ void mm_pasid_drop(struct mm_struct *mm)
if (likely(!mm_valid_pasid(mm)))
return;
- ida_free(&iommu_global_pasid_ida, mm->pasid);
+ iommu_free_global_pasid(mm->pasid);
}
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index 99869217fbec..cbe378c34ba3 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -107,9 +107,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link)
{
int ret;
- if (!iommu || IS_ERR(iommu))
- return -ENODEV;
-
ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices",
&link->kobj, dev_name(link));
if (ret)
@@ -122,14 +119,9 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link)
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_device_link);
void iommu_device_unlink(struct iommu_device *iommu, struct device *link)
{
- if (!iommu || IS_ERR(iommu))
- return;
-
sysfs_remove_link(&link->kobj, "iommu");
sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link));
}
-EXPORT_SYMBOL_GPL(iommu_device_unlink);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index caaf563d38ae..3bfc56df4f78 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -34,11 +34,14 @@
#include <linux/msi.h>
#include "dma-iommu.h"
+#include "iommu-priv.h"
#include "iommu-sva.h"
+#include "iommu-priv.h"
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
+static DEFINE_IDA(iommu_global_pasid_ida);
static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
@@ -127,9 +130,12 @@ static int iommu_setup_default_domain(struct iommu_group *group,
int target_type);
static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
struct device *dev);
-static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
static ssize_t iommu_group_store_type(struct iommu_group *group,
const char *buf, size_t count);
+static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
+ struct device *dev);
+static void __iommu_group_free_device(struct iommu_group *group,
+ struct group_device *grp_dev);
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
@@ -287,6 +293,48 @@ void iommu_device_unregister(struct iommu_device *iommu)
}
EXPORT_SYMBOL_GPL(iommu_device_unregister);
+#if IS_ENABLED(CONFIG_IOMMUFD_TEST)
+void iommu_device_unregister_bus(struct iommu_device *iommu,
+ struct bus_type *bus,
+ struct notifier_block *nb)
+{
+ bus_unregister_notifier(bus, nb);
+ iommu_device_unregister(iommu);
+}
+EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);
+
+/*
+ * Register an iommu driver against a single bus. This is only used by iommufd
+ * selftest to create a mock iommu driver. The caller must provide
+ * some memory to hold a notifier_block.
+ */
+int iommu_device_register_bus(struct iommu_device *iommu,
+ const struct iommu_ops *ops, struct bus_type *bus,
+ struct notifier_block *nb)
+{
+ int err;
+
+ iommu->ops = ops;
+ nb->notifier_call = iommu_bus_notifier;
+ err = bus_register_notifier(bus, nb);
+ if (err)
+ return err;
+
+ spin_lock(&iommu_device_lock);
+ list_add_tail(&iommu->list, &iommu_device_list);
+ spin_unlock(&iommu_device_lock);
+
+ bus->iommu_ops = ops;
+ err = bus_iommu_probe(bus);
+ if (err) {
+ iommu_device_unregister_bus(iommu, bus, nb);
+ return err;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_device_register_bus);
+#endif
+
static struct dev_iommu *dev_iommu_get(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
@@ -333,28 +381,18 @@ static u32 dev_iommu_get_max_pasids(struct device *dev)
return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
}
-static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
+/*
+ * Init the dev->iommu and dev->iommu_group in the struct device and get the
+ * driver probed
+ */
+static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_device *iommu_dev;
struct iommu_group *group;
- static DEFINE_MUTEX(iommu_probe_device_lock);
int ret;
- if (!ops)
- return -ENODEV;
- /*
- * Serialise to avoid races between IOMMU drivers registering in
- * parallel and/or the "replay" calls from ACPI/OF code via client
- * driver probe. Once the latter have been cleaned up we should
- * probably be able to use device_lock() here to minimise the scope,
- * but for now enforcing a simple global ordering is fine.
- */
- mutex_lock(&iommu_probe_device_lock);
- if (!dev_iommu_get(dev)) {
- ret = -ENOMEM;
- goto err_unlock;
- }
+ if (!dev_iommu_get(dev))
+ return -ENOMEM;
if (!try_module_get(ops->owner)) {
ret = -EINVAL;
@@ -364,124 +402,184 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
iommu_dev = ops->probe_device(dev);
if (IS_ERR(iommu_dev)) {
ret = PTR_ERR(iommu_dev);
- goto out_module_put;
+ goto err_module_put;
}
- dev->iommu->iommu_dev = iommu_dev;
- dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
- if (ops->is_attach_deferred)
- dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
+ ret = iommu_device_link(iommu_dev, dev);
+ if (ret)
+ goto err_release;
- group = iommu_group_get_for_dev(dev);
+ group = ops->device_group(dev);
+ if (WARN_ON_ONCE(group == NULL))
+ group = ERR_PTR(-EINVAL);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
- goto out_release;
+ goto err_unlink;
}
+ dev->iommu_group = group;
- mutex_lock(&group->mutex);
- if (group_list && !group->default_domain && list_empty(&group->entry))
- list_add_tail(&group->entry, group_list);
- mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
- mutex_unlock(&iommu_probe_device_lock);
- iommu_device_link(iommu_dev, dev);
-
+ dev->iommu->iommu_dev = iommu_dev;
+ dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
+ if (ops->is_attach_deferred)
+ dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
return 0;
-out_release:
+err_unlink:
+ iommu_device_unlink(iommu_dev, dev);
+err_release:
if (ops->release_device)
ops->release_device(dev);
-
-out_module_put:
+err_module_put:
module_put(ops->owner);
-
err_free:
dev_iommu_free(dev);
+ return ret;
+}
-err_unlock:
- mutex_unlock(&iommu_probe_device_lock);
+static void iommu_deinit_device(struct device *dev)
+{
+ struct iommu_group *group = dev->iommu_group;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
- return ret;
+ lockdep_assert_held(&group->mutex);
+
+ iommu_device_unlink(dev->iommu->iommu_dev, dev);
+
+ /*
+ * release_device() must stop using any attached domain on the device.
+ * If there are still other devices in the group they are not effected
+ * by this callback.
+ *
+ * The IOMMU driver must set the device to either an identity or
+ * blocking translation and stop using any domain pointer, as it is
+ * going to be freed.
+ */
+ if (ops->release_device)
+ ops->release_device(dev);
+
+ /*
+ * If this is the last driver to use the group then we must free the
+ * domains before we do the module_put().
+ */
+ if (list_empty(&group->devices)) {
+ if (group->default_domain) {
+ iommu_domain_free(group->default_domain);
+ group->default_domain = NULL;
+ }
+ if (group->blocking_domain) {
+ iommu_domain_free(group->blocking_domain);
+ group->blocking_domain = NULL;
+ }
+ group->domain = NULL;
+ }
+
+ /* Caller must put iommu_group */
+ dev->iommu_group = NULL;
+ module_put(ops->owner);
+ dev_iommu_free(dev);
}
-int iommu_probe_device(struct device *dev)
+static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
- const struct iommu_ops *ops;
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
+ static DEFINE_MUTEX(iommu_probe_device_lock);
+ struct group_device *gdev;
int ret;
- ret = __iommu_probe_device(dev, NULL);
- if (ret)
- goto err_out;
+ if (!ops)
+ return -ENODEV;
+ /*
+ * Serialise to avoid races between IOMMU drivers registering in
+ * parallel and/or the "replay" calls from ACPI/OF code via client
+ * driver probe. Once the latter have been cleaned up we should
+ * probably be able to use device_lock() here to minimise the scope,
+ * but for now enforcing a simple global ordering is fine.
+ */
+ mutex_lock(&iommu_probe_device_lock);
- group = iommu_group_get(dev);
- if (!group) {
- ret = -ENODEV;
- goto err_release;
+ /* Device is probed already if in a group */
+ if (dev->iommu_group) {
+ ret = 0;
+ goto out_unlock;
}
+ ret = iommu_init_device(dev, ops);
+ if (ret)
+ goto out_unlock;
+
+ group = dev->iommu_group;
+ gdev = iommu_group_alloc_device(group, dev);
mutex_lock(&group->mutex);
+ if (IS_ERR(gdev)) {
+ ret = PTR_ERR(gdev);
+ goto err_put_group;
+ }
+ /*
+ * The gdev must be in the list before calling
+ * iommu_setup_default_domain()
+ */
+ list_add_tail(&gdev->list, &group->devices);
+ WARN_ON(group->default_domain && !group->domain);
if (group->default_domain)
iommu_create_device_direct_mappings(group->default_domain, dev);
-
if (group->domain) {
ret = __iommu_device_set_domain(group, dev, group->domain, 0);
if (ret)
- goto err_unlock;
- } else if (!group->default_domain) {
+ goto err_remove_gdev;
+ } else if (!group->default_domain && !group_list) {
ret = iommu_setup_default_domain(group, 0);
if (ret)
- goto err_unlock;
+ goto err_remove_gdev;
+ } else if (!group->default_domain) {
+ /*
+ * With a group_list argument we defer the default_domain setup
+ * to the caller by providing a de-duplicated list of groups
+ * that need further setup.
+ */
+ if (list_empty(&group->entry))
+ list_add_tail(&group->entry, group_list);
}
-
mutex_unlock(&group->mutex);
- iommu_group_put(group);
+ mutex_unlock(&iommu_probe_device_lock);
- ops = dev_iommu_ops(dev);
- if (ops->probe_finalize)
- ops->probe_finalize(dev);
+ if (dev_is_pci(dev))
+ iommu_dma_set_pci_32bit_workaround(dev);
return 0;
-err_unlock:
+err_remove_gdev:
+ list_del(&gdev->list);
+ __iommu_group_free_device(group, gdev);
+err_put_group:
+ iommu_deinit_device(dev);
mutex_unlock(&group->mutex);
iommu_group_put(group);
-err_release:
- iommu_release_device(dev);
+out_unlock:
+ mutex_unlock(&iommu_probe_device_lock);
-err_out:
return ret;
-
}
-/*
- * Remove a device from a group's device list and return the group device
- * if successful.
- */
-static struct group_device *
-__iommu_group_remove_device(struct iommu_group *group, struct device *dev)
+int iommu_probe_device(struct device *dev)
{
- struct group_device *device;
+ const struct iommu_ops *ops;
+ int ret;
- lockdep_assert_held(&group->mutex);
- for_each_group_device(group, device) {
- if (device->dev == dev) {
- list_del(&device->list);
- return device;
- }
- }
+ ret = __iommu_probe_device(dev, NULL);
+ if (ret)
+ return ret;
- return NULL;
+ ops = dev_iommu_ops(dev);
+ if (ops->probe_finalize)
+ ops->probe_finalize(dev);
+
+ return 0;
}
-/*
- * Release a device from its group and decrements the iommu group reference
- * count.
- */
-static void __iommu_group_release_device(struct iommu_group *group,
- struct group_device *grp_dev)
+static void __iommu_group_free_device(struct iommu_group *group,
+ struct group_device *grp_dev)
{
struct device *dev = grp_dev->dev;
@@ -490,54 +588,57 @@ static void __iommu_group_release_device(struct iommu_group *group,
trace_remove_device_from_group(group->id, dev);
+ /*
+ * If the group has become empty then ownership must have been
+ * released, and the current domain must be set back to NULL or
+ * the default domain.
+ */
+ if (list_empty(&group->devices))
+ WARN_ON(group->owner_cnt ||
+ group->domain != group->default_domain);
+
kfree(grp_dev->name);
kfree(grp_dev);
- dev->iommu_group = NULL;
- kobject_put(group->devices_kobj);
}
-static void iommu_release_device(struct device *dev)
+/* Remove the iommu_group from the struct device. */
+static void __iommu_group_remove_device(struct device *dev)
{
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
- const struct iommu_ops *ops;
-
- if (!dev->iommu || !group)
- return;
-
- iommu_device_unlink(dev->iommu->iommu_dev, dev);
mutex_lock(&group->mutex);
- device = __iommu_group_remove_device(group, dev);
+ for_each_group_device(group, device) {
+ if (device->dev != dev)
+ continue;
- /*
- * If the group has become empty then ownership must have been released,
- * and the current domain must be set back to NULL or the default
- * domain.
- */
- if (list_empty(&group->devices))
- WARN_ON(group->owner_cnt ||
- group->domain != group->default_domain);
+ list_del(&device->list);
+ __iommu_group_free_device(group, device);
+ if (dev->iommu && dev->iommu->iommu_dev)
+ iommu_deinit_device(dev);
+ else
+ dev->iommu_group = NULL;
+ break;
+ }
+ mutex_unlock(&group->mutex);
/*
- * release_device() must stop using any attached domain on the device.
- * If there are still other devices in the group they are not effected
- * by this callback.
- *
- * The IOMMU driver must set the device to either an identity or
- * blocking translation and stop using any domain pointer, as it is
- * going to be freed.
+ * Pairs with the get in iommu_init_device() or
+ * iommu_group_add_device()
*/
- ops = dev_iommu_ops(dev);
- if (ops->release_device)
- ops->release_device(dev);
- mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+}
- if (device)
- __iommu_group_release_device(group, device);
+static void iommu_release_device(struct device *dev)
+{
+ struct iommu_group *group = dev->iommu_group;
- module_put(ops->owner);
- dev_iommu_free(dev);
+ if (group)
+ __iommu_group_remove_device(dev);
+
+ /* Free any fwspec if no iommu_driver was ever attached */
+ if (dev->iommu)
+ dev_iommu_free(dev);
}
static int __init iommu_set_def_domain_type(char *str)
@@ -798,10 +899,9 @@ static void iommu_group_release(struct kobject *kobj)
ida_free(&iommu_group_ida, group->id);
- if (group->default_domain)
- iommu_domain_free(group->default_domain);
- if (group->blocking_domain)
- iommu_domain_free(group->blocking_domain);
+ /* Domains are free'd by iommu_deinit_device() */
+ WARN_ON(group->default_domain);
+ WARN_ON(group->blocking_domain);
kfree(group->name);
kfree(group);
@@ -959,14 +1059,12 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
unsigned long pg_size;
int ret = 0;
- if (!iommu_is_dma_domain(domain))
- return 0;
-
- BUG_ON(!domain->pgsize_bitmap);
-
- pg_size = 1UL << __ffs(domain->pgsize_bitmap);
+ pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0;
INIT_LIST_HEAD(&mappings);
+ if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))
+ return -EINVAL;
+
iommu_get_resv_regions(dev, &mappings);
/* We need to consider overlapping regions for different devices */
@@ -974,13 +1072,17 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
dma_addr_t start, end, addr;
size_t map_size = 0;
- start = ALIGN(entry->start, pg_size);
- end = ALIGN(entry->start + entry->length, pg_size);
+ if (entry->type == IOMMU_RESV_DIRECT)
+ dev->iommu->require_direct = 1;
- if (entry->type != IOMMU_RESV_DIRECT &&
- entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
+ if ((entry->type != IOMMU_RESV_DIRECT &&
+ entry->type != IOMMU_RESV_DIRECT_RELAXABLE) ||
+ !iommu_is_dma_domain(domain))
continue;
+ start = ALIGN(entry->start, pg_size);
+ end = ALIGN(entry->start + entry->length, pg_size);
+
for (addr = start; addr <= end; addr += pg_size) {
phys_addr_t phys_addr;
@@ -1014,22 +1116,16 @@ out:
return ret;
}
-/**
- * iommu_group_add_device - add a device to an iommu group
- * @group: the group into which to add the device (reference should be held)
- * @dev: the device
- *
- * This function is called by an iommu driver to add a device into a
- * group. Adding a device increments the group reference count.
- */
-int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+/* This is undone by __iommu_group_free_device() */
+static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
+ struct device *dev)
{
int ret, i = 0;
struct group_device *device;
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
device->dev = dev;
@@ -1060,18 +1156,11 @@ rename:
goto err_free_name;
}
- kobject_get(group->devices_kobj);
-
- dev->iommu_group = group;
-
- mutex_lock(&group->mutex);
- list_add_tail(&device->list, &group->devices);
- mutex_unlock(&group->mutex);
trace_add_device_to_group(group->id, dev);
dev_info(dev, "Adding to iommu group %d\n", group->id);
- return 0;
+ return device;
err_free_name:
kfree(device->name);
@@ -1080,7 +1169,32 @@ err_remove_link:
err_free_device:
kfree(device);
dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
- return ret;
+ return ERR_PTR(ret);
+}
+
+/**
+ * iommu_group_add_device - add a device to an iommu group
+ * @group: the group into which to add the device (reference should be held)
+ * @dev: the device
+ *
+ * This function is called by an iommu driver to add a device into a
+ * group. Adding a device increments the group reference count.
+ */
+int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+{
+ struct group_device *gdev;
+
+ gdev = iommu_group_alloc_device(group, dev);
+ if (IS_ERR(gdev))
+ return PTR_ERR(gdev);
+
+ iommu_group_ref_get(group);
+ dev->iommu_group = group;
+
+ mutex_lock(&group->mutex);
+ list_add_tail(&gdev->list, &group->devices);
+ mutex_unlock(&group->mutex);
+ return 0;
}
EXPORT_SYMBOL_GPL(iommu_group_add_device);
@@ -1094,19 +1208,13 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device);
void iommu_group_remove_device(struct device *dev)
{
struct iommu_group *group = dev->iommu_group;
- struct group_device *device;
if (!group)
return;
dev_info(dev, "Removing from iommu group %d\n", group->id);
- mutex_lock(&group->mutex);
- device = __iommu_group_remove_device(group, dev);
- mutex_unlock(&group->mutex);
-
- if (device)
- __iommu_group_release_device(group, device);
+ __iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);
@@ -1664,45 +1772,6 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
return dom;
}
-/**
- * iommu_group_get_for_dev - Find or create the IOMMU group for a device
- * @dev: target device
- *
- * This function is intended to be called by IOMMU drivers and extended to
- * support common, bus-defined algorithms when determining or creating the
- * IOMMU group for a device. On success, the caller will hold a reference
- * to the returned IOMMU group, which will already include the provided
- * device. The reference should be released with iommu_group_put().
- */
-static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
- struct iommu_group *group;
- int ret;
-
- group = iommu_group_get(dev);
- if (group)
- return group;
-
- group = ops->device_group(dev);
- if (WARN_ON_ONCE(group == NULL))
- return ERR_PTR(-EINVAL);
-
- if (IS_ERR(group))
- return group;
-
- ret = iommu_group_add_device(group, dev);
- if (ret)
- goto out_put_group;
-
- return group;
-
-out_put_group:
- iommu_group_put(group);
-
- return ERR_PTR(ret);
-}
-
struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
{
return group->default_domain;
@@ -1711,16 +1780,8 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
static int probe_iommu_group(struct device *dev, void *data)
{
struct list_head *group_list = data;
- struct iommu_group *group;
int ret;
- /* Device is probed already if in a group */
- group = iommu_group_get(dev);
- if (group) {
- iommu_group_put(group);
- return 0;
- }
-
ret = __iommu_probe_device(dev, group_list);
if (ret == -ENODEV)
ret = 0;
@@ -1796,11 +1857,6 @@ int bus_iommu_probe(const struct bus_type *bus)
LIST_HEAD(group_list);
int ret;
- /*
- * This code-path does not allocate the default domain when
- * creating the iommu group, so do it after the groups are
- * created.
- */
ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
if (ret)
return ret;
@@ -1813,6 +1869,11 @@ int bus_iommu_probe(const struct bus_type *bus)
/* Remove item from the list */
list_del_init(&group->entry);
+ /*
+ * We go to the trouble of deferred default domain creation so
+ * that the cross-group default domain type and the setup of the
+ * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios.
+ */
ret = iommu_setup_default_domain(group, 0);
if (ret) {
mutex_unlock(&group->mutex);
@@ -2114,6 +2175,32 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
+/**
+ * iommu_group_replace_domain - replace the domain that a group is attached to
+ * @new_domain: new IOMMU domain to replace with
+ * @group: IOMMU group that will be attached to the new domain
+ *
+ * This API allows the group to switch domains without being forced to go to
+ * the blocking domain in-between.
+ *
+ * If the currently attached domain is a core domain (e.g. a default_domain),
+ * it will act just like the iommu_attach_group().
+ */
+int iommu_group_replace_domain(struct iommu_group *group,
+ struct iommu_domain *new_domain)
+{
+ int ret;
+
+ if (!new_domain)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ ret = __iommu_group_set_domain(group, new_domain);
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, IOMMUFD_INTERNAL);
+
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
@@ -2121,6 +2208,21 @@ static int __iommu_device_set_domain(struct iommu_group *group,
{
int ret;
+ /*
+ * If the device requires IOMMU_RESV_DIRECT then we cannot allow
+ * the blocking domain to be attached as it does not contain the
+ * required 1:1 mapping. This test effectively excludes the device
+ * being used with iommu_group_claim_dma_owner() which will block
+ * vfio and iommufd as well.
+ */
+ if (dev->iommu->require_direct &&
+ (new_domain->type == IOMMU_DOMAIN_BLOCKED ||
+ new_domain == group->blocking_domain)) {
+ dev_warn(dev,
+ "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n");
+ return -EINVAL;
+ }
+
if (dev->iommu->attach_deferred) {
if (new_domain == group->default_domain)
return 0;
@@ -2642,6 +2744,14 @@ int iommu_set_pgtable_quirks(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
+/**
+ * iommu_get_resv_regions - get reserved regions
+ * @dev: device for which to get reserved regions
+ * @list: reserved region list for device
+ *
+ * This returns a list of reserved IOVA regions specific to this device.
+ * A domain user should not map IOVA in these ranges.
+ */
void iommu_get_resv_regions(struct device *dev, struct list_head *list)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
@@ -2649,9 +2759,10 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
if (ops->get_resv_regions)
ops->get_resv_regions(dev, list);
}
+EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
/**
- * iommu_put_resv_regions - release resered regions
+ * iommu_put_resv_regions - release reserved regions
* @dev: device for which to free reserved regions
* @list: reserved region list for device
*
@@ -3203,7 +3314,7 @@ static void __iommu_release_dma_ownership(struct iommu_group *group)
/**
* iommu_group_release_dma_owner() - Release DMA ownership of a group
- * @dev: The device
+ * @group: The group
*
* Release the DMA ownership claimed by iommu_group_claim_dma_owner().
*/
@@ -3217,7 +3328,7 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
/**
* iommu_device_release_dma_owner() - Release DMA ownership of a device
- * @group: The device.
+ * @dev: The device.
*
* Release the DMA ownership claimed by iommu_device_claim_dma_owner().
*/
@@ -3400,3 +3511,30 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
return domain;
}
+
+ioasid_t iommu_alloc_global_pasid(struct device *dev)
+{
+ int ret;
+
+ /* max_pasids == 0 means that the device does not support PASID */
+ if (!dev->iommu->max_pasids)
+ return IOMMU_PASID_INVALID;
+
+ /*
+ * max_pasids is set up by vendor driver based on number of PASID bits
+ * supported but the IDA allocation is inclusive.
+ */
+ ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID,
+ dev->iommu->max_pasids - 1, GFP_KERNEL);
+ return ret < 0 ? IOMMU_PASID_INVALID : ret;
+}
+EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid);
+
+void iommu_free_global_pasid(ioasid_t pasid)
+{
+ if (WARN_ON(pasid == IOMMU_PASID_INVALID))
+ return;
+
+ ida_free(&iommu_global_pasid_ida, pasid);
+}
+EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index ada693ea51a7..99d4b075df49 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -14,8 +14,8 @@ config IOMMUFD
if IOMMUFD
config IOMMUFD_VFIO_CONTAINER
bool "IOMMUFD provides the VFIO container /dev/vfio/vfio"
- depends on VFIO && !VFIO_CONTAINER
- default VFIO && !VFIO_CONTAINER
+ depends on VFIO_GROUP && !VFIO_CONTAINER
+ default VFIO_GROUP && !VFIO_CONTAINER
help
IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on
IOMMUFD providing compatibility emulation to give the same ioctls.
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index ed2937a4e196..ce78c3671539 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,6 +4,8 @@
#include <linux/iommufd.h>
#include <linux/slab.h>
#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+#include "../iommu-priv.h"
#include "io_pagetable.h"
#include "iommufd_private.h"
@@ -15,13 +17,127 @@ MODULE_PARM_DESC(
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
"the MSI interrupt window. Enabling this is a security weakness.");
+static void iommufd_group_release(struct kref *kref)
+{
+ struct iommufd_group *igroup =
+ container_of(kref, struct iommufd_group, ref);
+
+ WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
+
+ xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
+ NULL, GFP_KERNEL);
+ iommu_group_put(igroup->group);
+ mutex_destroy(&igroup->lock);
+ kfree(igroup);
+}
+
+static void iommufd_put_group(struct iommufd_group *group)
+{
+ kref_put(&group->ref, iommufd_group_release);
+}
+
+static bool iommufd_group_try_get(struct iommufd_group *igroup,
+ struct iommu_group *group)
+{
+ if (!igroup)
+ return false;
+ /*
+ * group ID's cannot be re-used until the group is put back which does
+ * not happen if we could get an igroup pointer under the xa_lock.
+ */
+ if (WARN_ON(igroup->group != group))
+ return false;
+ return kref_get_unless_zero(&igroup->ref);
+}
+
+/*
+ * iommufd needs to store some more data for each iommu_group, we keep a
+ * parallel xarray indexed by iommu_group id to hold this instead of putting it
+ * in the core structure. To keep things simple the iommufd_group memory is
+ * unique within the iommufd_ctx. This makes it easy to check there are no
+ * memory leaks.
+ */
+static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
+ struct device *dev)
+{
+ struct iommufd_group *new_igroup;
+ struct iommufd_group *cur_igroup;
+ struct iommufd_group *igroup;
+ struct iommu_group *group;
+ unsigned int id;
+
+ group = iommu_group_get(dev);
+ if (!group)
+ return ERR_PTR(-ENODEV);
+
+ id = iommu_group_id(group);
+
+ xa_lock(&ictx->groups);
+ igroup = xa_load(&ictx->groups, id);
+ if (iommufd_group_try_get(igroup, group)) {
+ xa_unlock(&ictx->groups);
+ iommu_group_put(group);
+ return igroup;
+ }
+ xa_unlock(&ictx->groups);
+
+ new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
+ if (!new_igroup) {
+ iommu_group_put(group);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ kref_init(&new_igroup->ref);
+ mutex_init(&new_igroup->lock);
+ INIT_LIST_HEAD(&new_igroup->device_list);
+ new_igroup->sw_msi_start = PHYS_ADDR_MAX;
+ /* group reference moves into new_igroup */
+ new_igroup->group = group;
+
+ /*
+ * The ictx is not additionally refcounted here becase all objects using
+ * an igroup must put it before their destroy completes.
+ */
+ new_igroup->ictx = ictx;
+
+ /*
+ * We dropped the lock so igroup is invalid. NULL is a safe and likely
+ * value to assume for the xa_cmpxchg algorithm.
+ */
+ cur_igroup = NULL;
+ xa_lock(&ictx->groups);
+ while (true) {
+ igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
+ GFP_KERNEL);
+ if (xa_is_err(igroup)) {
+ xa_unlock(&ictx->groups);
+ iommufd_put_group(new_igroup);
+ return ERR_PTR(xa_err(igroup));
+ }
+
+ /* new_group was successfully installed */
+ if (cur_igroup == igroup) {
+ xa_unlock(&ictx->groups);
+ return new_igroup;
+ }
+
+ /* Check again if the current group is any good */
+ if (iommufd_group_try_get(igroup, group)) {
+ xa_unlock(&ictx->groups);
+ iommufd_put_group(new_igroup);
+ return igroup;
+ }
+ cur_igroup = igroup;
+ }
+}
+
void iommufd_device_destroy(struct iommufd_object *obj)
{
struct iommufd_device *idev =
container_of(obj, struct iommufd_device, obj);
iommu_device_release_dma_owner(idev->dev);
- iommu_group_put(idev->group);
+ iommufd_put_group(idev->igroup);
if (!iommufd_selftest_is_mock_dev(idev->dev))
iommufd_ctx_put(idev->ictx);
}
@@ -46,7 +162,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
struct device *dev, u32 *id)
{
struct iommufd_device *idev;
- struct iommu_group *group;
+ struct iommufd_group *igroup;
int rc;
/*
@@ -56,9 +172,29 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
return ERR_PTR(-EINVAL);
- group = iommu_group_get(dev);
- if (!group)
- return ERR_PTR(-ENODEV);
+ igroup = iommufd_get_group(ictx, dev);
+ if (IS_ERR(igroup))
+ return ERR_CAST(igroup);
+
+ /*
+ * For historical compat with VFIO the insecure interrupt path is
+ * allowed if the module parameter is set. Secure/Isolated means that a
+ * MemWr operation from the device (eg a simple DMA) cannot trigger an
+ * interrupt outside this iommufd context.
+ */
+ if (!iommufd_selftest_is_mock_dev(dev) &&
+ !iommu_group_has_isolated_msi(igroup->group)) {
+ if (!allow_unsafe_interrupts) {
+ rc = -EPERM;
+ goto out_group_put;
+ }
+
+ dev_warn(
+ dev,
+ "MSI interrupts are not secure, they cannot be isolated by the platform. "
+ "Check that platform features like interrupt remapping are enabled. "
+ "Use the \"allow_unsafe_interrupts\" module parameter to override\n");
+ }
rc = iommu_device_claim_dma_owner(dev, ictx);
if (rc)
@@ -77,8 +213,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
/* The calling driver is a user until iommufd_device_unbind() */
refcount_inc(&idev->obj.users);
- /* group refcount moves into iommufd_device */
- idev->group = group;
+ /* igroup refcount moves into iommufd_device */
+ idev->igroup = igroup;
/*
* If the caller fails after this success it must call
@@ -93,12 +229,43 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
out_release_owner:
iommu_device_release_dma_owner(dev);
out_group_put:
- iommu_group_put(group);
+ iommufd_put_group(igroup);
return ERR_PTR(rc);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
/**
+ * iommufd_ctx_has_group - True if any device within the group is bound
+ * to the ictx
+ * @ictx: iommufd file descriptor
+ * @group: Pointer to a physical iommu_group struct
+ *
+ * True if any device within the group has been bound to this ictx, ex. via
+ * iommufd_device_bind(), therefore implying ictx ownership of the group.
+ */
+bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
+{
+ struct iommufd_object *obj;
+ unsigned long index;
+
+ if (!ictx || !group)
+ return false;
+
+ xa_lock(&ictx->objects);
+ xa_for_each(&ictx->objects, index, obj) {
+ if (obj->type == IOMMUFD_OBJ_DEVICE &&
+ container_of(obj, struct iommufd_device, obj)
+ ->igroup->group == group) {
+ xa_unlock(&ictx->objects);
+ return true;
+ }
+ }
+ xa_unlock(&ictx->objects);
+ return false;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
+
+/**
* iommufd_device_unbind - Undo iommufd_device_bind()
* @idev: Device returned by iommufd_device_bind()
*
@@ -113,10 +280,22 @@ void iommufd_device_unbind(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
-static int iommufd_device_setup_msi(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- phys_addr_t sw_msi_start)
+struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
+{
+ return idev->ictx;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
+
+u32 iommufd_device_to_id(struct iommufd_device *idev)
{
+ return idev->obj.id;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
+
+static int iommufd_group_setup_msi(struct iommufd_group *igroup,
+ struct iommufd_hw_pagetable *hwpt)
+{
+ phys_addr_t sw_msi_start = igroup->sw_msi_start;
int rc;
/*
@@ -143,128 +322,192 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
*/
hwpt->msi_cookie = true;
}
-
- /*
- * For historical compat with VFIO the insecure interrupt path is
- * allowed if the module parameter is set. Insecure means that a MemWr
- * operation from the device (eg a simple DMA) cannot trigger an
- * interrupt outside this iommufd context.
- */
- if (!iommufd_selftest_is_mock_dev(idev->dev) &&
- !iommu_group_has_isolated_msi(idev->group)) {
- if (!allow_unsafe_interrupts)
- return -EPERM;
-
- dev_warn(
- idev->dev,
- "MSI interrupts are not secure, they cannot be isolated by the platform. "
- "Check that platform features like interrupt remapping are enabled. "
- "Use the \"allow_unsafe_interrupts\" module parameter to override\n");
- }
return 0;
}
-static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
- struct iommu_group *group)
-{
- struct iommufd_device *cur_dev;
-
- lockdep_assert_held(&hwpt->devices_lock);
-
- list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
- if (cur_dev->group == group)
- return true;
- return false;
-}
-
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
- phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
int rc;
- lockdep_assert_held(&hwpt->devices_lock);
-
- if (WARN_ON(idev->hwpt))
- return -EINVAL;
+ mutex_lock(&idev->igroup->lock);
- /*
- * Try to upgrade the domain we have, it is an iommu driver bug to
- * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail
- * enforce_cache_coherency when there are no devices attached to the
- * domain.
- */
- if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) {
- if (hwpt->domain->ops->enforce_cache_coherency)
- hwpt->enforce_cache_coherency =
- hwpt->domain->ops->enforce_cache_coherency(
- hwpt->domain);
- if (!hwpt->enforce_cache_coherency) {
- WARN_ON(list_empty(&hwpt->devices));
- return -EINVAL;
- }
+ if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
+ rc = -EINVAL;
+ goto err_unlock;
}
- rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
- idev->group, &sw_msi_start);
- if (rc)
- return rc;
+ /* Try to upgrade the domain we have */
+ if (idev->enforce_cache_coherency) {
+ rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+ if (rc)
+ goto err_unlock;
+ }
- rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start);
+ rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev,
+ &idev->igroup->sw_msi_start);
if (rc)
- goto err_unresv;
+ goto err_unlock;
/*
- * FIXME: Hack around missing a device-centric iommu api, only attach to
- * the group once for the first device that is in the group.
+ * Only attach to the group once for the first device that is in the
+ * group. All the other devices will follow this attachment. The user
+ * should attach every device individually to the hwpt as the per-device
+ * reserved regions are only updated during individual device
+ * attachment.
*/
- if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
- rc = iommu_attach_group(hwpt->domain, idev->group);
+ if (list_empty(&idev->igroup->device_list)) {
+ rc = iommufd_group_setup_msi(idev->igroup, hwpt);
if (rc)
goto err_unresv;
+
+ rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+ if (rc)
+ goto err_unresv;
+ idev->igroup->hwpt = hwpt;
}
+ refcount_inc(&hwpt->obj.users);
+ list_add_tail(&idev->group_item, &idev->igroup->device_list);
+ mutex_unlock(&idev->igroup->lock);
return 0;
err_unresv:
iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+err_unlock:
+ mutex_unlock(&idev->igroup->lock);
return rc;
}
-void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_detach(struct iommufd_device *idev)
{
- if (!iommufd_hw_pagetable_has_group(hwpt, idev->group))
- iommu_detach_group(hwpt->domain, idev->group);
+ struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
+
+ mutex_lock(&idev->igroup->lock);
+ list_del(&idev->group_item);
+ if (list_empty(&idev->igroup->device_list)) {
+ iommu_detach_group(hwpt->domain, idev->igroup->group);
+ idev->igroup->hwpt = NULL;
+ }
iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+ mutex_unlock(&idev->igroup->lock);
+
+ /* Caller must destroy hwpt */
+ return hwpt;
}
-static int iommufd_device_do_attach(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt)
+static struct iommufd_hw_pagetable *
+iommufd_device_do_attach(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt)
{
int rc;
- mutex_lock(&hwpt->devices_lock);
rc = iommufd_hw_pagetable_attach(hwpt, idev);
if (rc)
- goto out_unlock;
+ return ERR_PTR(rc);
+ return NULL;
+}
- idev->hwpt = hwpt;
- refcount_inc(&hwpt->obj.users);
- list_add(&idev->devices_item, &hwpt->devices);
-out_unlock:
- mutex_unlock(&hwpt->devices_lock);
- return rc;
+static struct iommufd_hw_pagetable *
+iommufd_device_do_replace(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt)
+{
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hw_pagetable *old_hwpt;
+ unsigned int num_devices = 0;
+ struct iommufd_device *cur;
+ int rc;
+
+ mutex_lock(&idev->igroup->lock);
+
+ if (igroup->hwpt == NULL) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (hwpt == igroup->hwpt) {
+ mutex_unlock(&idev->igroup->lock);
+ return NULL;
+ }
+
+ /* Try to upgrade the domain we have */
+ list_for_each_entry(cur, &igroup->device_list, group_item) {
+ num_devices++;
+ if (cur->enforce_cache_coherency) {
+ rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+ if (rc)
+ goto err_unlock;
+ }
+ }
+
+ old_hwpt = igroup->hwpt;
+ if (hwpt->ioas != old_hwpt->ioas) {
+ list_for_each_entry(cur, &igroup->device_list, group_item) {
+ rc = iopt_table_enforce_dev_resv_regions(
+ &hwpt->ioas->iopt, cur->dev, NULL);
+ if (rc)
+ goto err_unresv;
+ }
+ }
+
+ rc = iommufd_group_setup_msi(idev->igroup, hwpt);
+ if (rc)
+ goto err_unresv;
+
+ rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+ if (rc)
+ goto err_unresv;
+
+ if (hwpt->ioas != old_hwpt->ioas) {
+ list_for_each_entry(cur, &igroup->device_list, group_item)
+ iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
+ cur->dev);
+ }
+
+ igroup->hwpt = hwpt;
+
+ /*
+ * Move the refcounts held by the device_list to the new hwpt. Retain a
+ * refcount for this thread as the caller will free it.
+ */
+ refcount_add(num_devices, &hwpt->obj.users);
+ if (num_devices > 1)
+ WARN_ON(refcount_sub_and_test(num_devices - 1,
+ &old_hwpt->obj.users));
+ mutex_unlock(&idev->igroup->lock);
+
+ /* Caller must destroy old_hwpt */
+ return old_hwpt;
+err_unresv:
+ list_for_each_entry(cur, &igroup->device_list, group_item)
+ iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
+err_unlock:
+ mutex_unlock(&idev->igroup->lock);
+ return ERR_PTR(rc);
}
+typedef struct iommufd_hw_pagetable *(*attach_fn)(
+ struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+
/*
* When automatically managing the domains we search for a compatible domain in
* the iopt and if one is found use it, otherwise create a new domain.
* Automatic domain selection will never pick a manually created domain.
*/
-static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
- struct iommufd_ioas *ioas)
+static struct iommufd_hw_pagetable *
+iommufd_device_auto_get_domain(struct iommufd_device *idev,
+ struct iommufd_ioas *ioas, u32 *pt_id,
+ attach_fn do_attach)
{
+ /*
+ * iommufd_hw_pagetable_attach() is called by
+ * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
+ * iommufd_device_do_attach(). So if we are in this mode then we prefer
+ * to use the immediate_attach path as it supports drivers that can't
+ * directly allocate a domain.
+ */
+ bool immediate_attach = do_attach == iommufd_device_do_attach;
+ struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_hw_pagetable *hwpt;
- int rc;
/*
* There is no differentiation when domains are allocated, so any domain
@@ -278,50 +521,58 @@ static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
if (!iommufd_lock_obj(&hwpt->obj))
continue;
- rc = iommufd_device_do_attach(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, hwpt);
+ if (IS_ERR(destroy_hwpt)) {
+ iommufd_put_object(&hwpt->obj);
+ /*
+ * -EINVAL means the domain is incompatible with the
+ * device. Other error codes should propagate to
+ * userspace as failure. Success means the domain is
+ * attached.
+ */
+ if (PTR_ERR(destroy_hwpt) == -EINVAL)
+ continue;
+ goto out_unlock;
+ }
+ *pt_id = hwpt->obj.id;
iommufd_put_object(&hwpt->obj);
-
- /*
- * -EINVAL means the domain is incompatible with the device.
- * Other error codes should propagate to userspace as failure.
- * Success means the domain is attached.
- */
- if (rc == -EINVAL)
- continue;
goto out_unlock;
}
- hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true);
+ hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
+ immediate_attach);
if (IS_ERR(hwpt)) {
- rc = PTR_ERR(hwpt);
+ destroy_hwpt = ERR_CAST(hwpt);
goto out_unlock;
}
+
+ if (!immediate_attach) {
+ destroy_hwpt = (*do_attach)(idev, hwpt);
+ if (IS_ERR(destroy_hwpt))
+ goto out_abort;
+ } else {
+ destroy_hwpt = NULL;
+ }
+
hwpt->auto_domain = true;
+ *pt_id = hwpt->obj.id;
- mutex_unlock(&ioas->mutex);
iommufd_object_finalize(idev->ictx, &hwpt->obj);
- return 0;
+ mutex_unlock(&ioas->mutex);
+ return destroy_hwpt;
+
+out_abort:
+ iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
out_unlock:
mutex_unlock(&ioas->mutex);
- return rc;
+ return destroy_hwpt;
}
-/**
- * iommufd_device_attach - Connect a device from an iommu_domain
- * @idev: device to attach
- * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
- * Output the IOMMUFD_OBJ_HW_PAGETABLE ID
- *
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
- *
- * The caller should return the resulting pt_id back to userspace.
- * This function is undone by calling iommufd_device_detach().
- */
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
+ attach_fn do_attach)
{
+ struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_object *pt_obj;
- int rc;
pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
if (IS_ERR(pt_obj))
@@ -332,8 +583,8 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
- rc = iommufd_device_do_attach(idev, hwpt);
- if (rc)
+ destroy_hwpt = (*do_attach)(idev, hwpt);
+ if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
}
@@ -341,27 +592,80 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
struct iommufd_ioas *ioas =
container_of(pt_obj, struct iommufd_ioas, obj);
- rc = iommufd_device_auto_get_domain(idev, ioas);
- if (rc)
+ destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
+ do_attach);
+ if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
}
default:
- rc = -EINVAL;
+ destroy_hwpt = ERR_PTR(-EINVAL);
goto out_put_pt_obj;
}
+ iommufd_put_object(pt_obj);
- refcount_inc(&idev->obj.users);
- *pt_id = idev->hwpt->obj.id;
- rc = 0;
+ /* This destruction has to be after we unlock everything */
+ if (destroy_hwpt)
+ iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
+ return 0;
out_put_pt_obj:
iommufd_put_object(pt_obj);
- return rc;
+ return PTR_ERR(destroy_hwpt);
+}
+
+/**
+ * iommufd_device_attach - Connect a device to an iommu_domain
+ * @idev: device to attach
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ * Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This connects the device to an iommu_domain, either automatically or manually
+ * selected. Once this completes the device could do DMA.
+ *
+ * The caller should return the resulting pt_id back to userspace.
+ * This function is undone by calling iommufd_device_detach().
+ */
+int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+{
+ int rc;
+
+ rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+ if (rc)
+ return rc;
+
+ /*
+ * Pairs with iommufd_device_detach() - catches caller bugs attempting
+ * to destroy a device with an attachment.
+ */
+ refcount_inc(&idev->obj.users);
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
/**
+ * iommufd_device_replace - Change the device's iommu_domain
+ * @idev: device to change
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ * Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This is the same as::
+ *
+ * iommufd_device_detach();
+ * iommufd_device_attach();
+ *
+ * If it fails then no change is made to the attachment. The iommu driver may
+ * implement this so there is no disruption in translation. This can only be
+ * called if iommufd_device_attach() has already succeeded.
+ */
+int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+{
+ return iommufd_device_change_pt(idev, pt_id,
+ &iommufd_device_do_replace);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
+
+/**
* iommufd_device_detach - Disconnect a device to an iommu_domain
* @idev: device to detach
*
@@ -370,33 +674,87 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
*/
void iommufd_device_detach(struct iommufd_device *idev)
{
- struct iommufd_hw_pagetable *hwpt = idev->hwpt;
-
- mutex_lock(&hwpt->devices_lock);
- list_del(&idev->devices_item);
- idev->hwpt = NULL;
- iommufd_hw_pagetable_detach(hwpt, idev);
- mutex_unlock(&hwpt->devices_lock);
-
- if (hwpt->auto_domain)
- iommufd_object_deref_user(idev->ictx, &hwpt->obj);
- else
- refcount_dec(&hwpt->obj.users);
+ struct iommufd_hw_pagetable *hwpt;
+ hwpt = iommufd_hw_pagetable_detach(idev);
+ iommufd_hw_pagetable_put(idev->ictx, hwpt);
refcount_dec(&idev->obj.users);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
+/*
+ * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
+ * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
+ * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
+ */
+static int iommufd_access_change_ioas(struct iommufd_access *access,
+ struct iommufd_ioas *new_ioas)
+{
+ u32 iopt_access_list_id = access->iopt_access_list_id;
+ struct iommufd_ioas *cur_ioas = access->ioas;
+ int rc;
+
+ lockdep_assert_held(&access->ioas_lock);
+
+ /* We are racing with a concurrent detach, bail */
+ if (cur_ioas != access->ioas_unpin)
+ return -EBUSY;
+
+ if (cur_ioas == new_ioas)
+ return 0;
+
+ /*
+ * Set ioas to NULL to block any further iommufd_access_pin_pages().
+ * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
+ */
+ access->ioas = NULL;
+
+ if (new_ioas) {
+ rc = iopt_add_access(&new_ioas->iopt, access);
+ if (rc) {
+ access->ioas = cur_ioas;
+ return rc;
+ }
+ refcount_inc(&new_ioas->obj.users);
+ }
+
+ if (cur_ioas) {
+ if (access->ops->unmap) {
+ mutex_unlock(&access->ioas_lock);
+ access->ops->unmap(access->data, 0, ULONG_MAX);
+ mutex_lock(&access->ioas_lock);
+ }
+ iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
+ refcount_dec(&cur_ioas->obj.users);
+ }
+
+ access->ioas = new_ioas;
+ access->ioas_unpin = new_ioas;
+
+ return 0;
+}
+
+static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
+{
+ struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
+ int rc;
+
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+ rc = iommufd_access_change_ioas(access, ioas);
+ iommufd_put_object(&ioas->obj);
+ return rc;
+}
+
void iommufd_access_destroy_object(struct iommufd_object *obj)
{
struct iommufd_access *access =
container_of(obj, struct iommufd_access, obj);
- if (access->ioas) {
- iopt_remove_access(&access->ioas->iopt, access);
- refcount_dec(&access->ioas->obj.users);
- access->ioas = NULL;
- }
+ mutex_lock(&access->ioas_lock);
+ if (access->ioas)
+ WARN_ON(iommufd_access_change_ioas(access, NULL));
+ mutex_unlock(&access->ioas_lock);
iommufd_ctx_put(access->ictx);
}
@@ -441,6 +799,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
iommufd_ctx_get(ictx);
iommufd_object_finalize(ictx, &access->obj);
*id = access->obj.id;
+ mutex_init(&access->ioas_lock);
return access;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
@@ -457,30 +816,49 @@ void iommufd_access_destroy(struct iommufd_access *access)
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
+void iommufd_access_detach(struct iommufd_access *access)
+{
+ mutex_lock(&access->ioas_lock);
+ if (WARN_ON(!access->ioas)) {
+ mutex_unlock(&access->ioas_lock);
+ return;
+ }
+ WARN_ON(iommufd_access_change_ioas(access, NULL));
+ mutex_unlock(&access->ioas_lock);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
+
int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
{
- struct iommufd_ioas *new_ioas;
- int rc = 0;
+ int rc;
- if (access->ioas)
+ mutex_lock(&access->ioas_lock);
+ if (WARN_ON(access->ioas)) {
+ mutex_unlock(&access->ioas_lock);
return -EINVAL;
-
- new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
- if (IS_ERR(new_ioas))
- return PTR_ERR(new_ioas);
-
- rc = iopt_add_access(&new_ioas->iopt, access);
- if (rc) {
- iommufd_put_object(&new_ioas->obj);
- return rc;
}
- iommufd_ref_to_users(&new_ioas->obj);
- access->ioas = new_ioas;
- return 0;
+ rc = iommufd_access_change_ioas_id(access, ioas_id);
+ mutex_unlock(&access->ioas_lock);
+ return rc;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
+{
+ int rc;
+
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return -ENOENT;
+ }
+ rc = iommufd_access_change_ioas_id(access, ioas_id);
+ mutex_unlock(&access->ioas_lock);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
+
/**
* iommufd_access_notify_unmap - Notify users of an iopt to stop using it
* @iopt: iopt to work on
@@ -531,8 +909,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
void iommufd_access_unpin_pages(struct iommufd_access *access,
unsigned long iova, unsigned long length)
{
- struct io_pagetable *iopt = &access->ioas->iopt;
struct iopt_area_contig_iter iter;
+ struct io_pagetable *iopt;
unsigned long last_iova;
struct iopt_area *area;
@@ -540,6 +918,17 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
return;
+ mutex_lock(&access->ioas_lock);
+ /*
+ * The driver must be doing something wrong if it calls this before an
+ * iommufd_access_attach() or after an iommufd_access_detach().
+ */
+ if (WARN_ON(!access->ioas_unpin)) {
+ mutex_unlock(&access->ioas_lock);
+ return;
+ }
+ iopt = &access->ioas_unpin->iopt;
+
down_read(&iopt->iova_rwsem);
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
iopt_area_remove_access(
@@ -549,6 +938,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
min(last_iova, iopt_area_last_iova(area))));
WARN_ON(!iopt_area_contig_done(&iter));
up_read(&iopt->iova_rwsem);
+ mutex_unlock(&access->ioas_lock);
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
@@ -594,8 +984,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
unsigned long length, struct page **out_pages,
unsigned int flags)
{
- struct io_pagetable *iopt = &access->ioas->iopt;
struct iopt_area_contig_iter iter;
+ struct io_pagetable *iopt;
unsigned long last_iova;
struct iopt_area *area;
int rc;
@@ -610,6 +1000,13 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
if (check_add_overflow(iova, length - 1, &last_iova))
return -EOVERFLOW;
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return -ENOENT;
+ }
+ iopt = &access->ioas->iopt;
+
down_read(&iopt->iova_rwsem);
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -640,6 +1037,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
}
up_read(&iopt->iova_rwsem);
+ mutex_unlock(&access->ioas_lock);
return 0;
err_remove:
@@ -654,6 +1052,7 @@ err_remove:
iopt_area_last_iova(area))));
}
up_read(&iopt->iova_rwsem);
+ mutex_unlock(&access->ioas_lock);
return rc;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
@@ -673,8 +1072,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
void *data, size_t length, unsigned int flags)
{
- struct io_pagetable *iopt = &access->ioas->iopt;
struct iopt_area_contig_iter iter;
+ struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
int rc;
@@ -684,6 +1083,13 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
if (check_add_overflow(iova, length - 1, &last_iova))
return -EOVERFLOW;
+ mutex_lock(&access->ioas_lock);
+ if (!access->ioas) {
+ mutex_unlock(&access->ioas_lock);
+ return -ENOENT;
+ }
+ iopt = &access->ioas->iopt;
+
down_read(&iopt->iova_rwsem);
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -710,6 +1116,79 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
rc = -ENOENT;
err_out:
up_read(&iopt->iova_rwsem);
+ mutex_unlock(&access->ioas_lock);
return rc;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
+
+int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hw_info *cmd = ucmd->cmd;
+ void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
+ const struct iommu_ops *ops;
+ struct iommufd_device *idev;
+ unsigned int data_len;
+ unsigned int copy_len;
+ void *data;
+ int rc;
+
+ if (cmd->flags || cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ idev = iommufd_get_device(ucmd, cmd->dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ ops = dev_iommu_ops(idev->dev);
+ if (ops->hw_info) {
+ data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
+ if (IS_ERR(data)) {
+ rc = PTR_ERR(data);
+ goto out_put;
+ }
+
+ /*
+ * drivers that have hw_info callback should have a unique
+ * iommu_hw_info_type.
+ */
+ if (WARN_ON_ONCE(cmd->out_data_type ==
+ IOMMU_HW_INFO_TYPE_NONE)) {
+ rc = -ENODEV;
+ goto out_free;
+ }
+ } else {
+ cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
+ data_len = 0;
+ data = NULL;
+ }
+
+ copy_len = min(cmd->data_len, data_len);
+ if (copy_to_user(user_ptr, data, copy_len)) {
+ rc = -EFAULT;
+ goto out_free;
+ }
+
+ /*
+ * Zero the trailing bytes if the user buffer is bigger than the
+ * data size kernel actually has.
+ */
+ if (copy_len < cmd->data_len) {
+ if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
+ rc = -EFAULT;
+ goto out_free;
+ }
+ }
+
+ /*
+ * We return the length the kernel supports so userspace may know what
+ * the kernel capability is. It could be larger than the input buffer.
+ */
+ cmd->data_len = data_len;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_free:
+ kfree(data);
+out_put:
+ iommufd_put_object(&idev->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 6cdb6749d359..cf2c1504e20d 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -3,6 +3,7 @@
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
*/
#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
#include "iommufd_private.h"
@@ -11,8 +12,6 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
struct iommufd_hw_pagetable *hwpt =
container_of(obj, struct iommufd_hw_pagetable, obj);
- WARN_ON(!list_empty(&hwpt->devices));
-
if (!list_empty(&hwpt->hwpt_item)) {
mutex_lock(&hwpt->ioas->mutex);
list_del(&hwpt->hwpt_item);
@@ -25,7 +24,35 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
iommu_domain_free(hwpt->domain);
refcount_dec(&hwpt->ioas->obj.users);
- mutex_destroy(&hwpt->devices_lock);
+}
+
+void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
+{
+ struct iommufd_hw_pagetable *hwpt =
+ container_of(obj, struct iommufd_hw_pagetable, obj);
+
+ /* The ioas->mutex must be held until finalize is called. */
+ lockdep_assert_held(&hwpt->ioas->mutex);
+
+ if (!list_empty(&hwpt->hwpt_item)) {
+ list_del_init(&hwpt->hwpt_item);
+ iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain);
+ }
+ iommufd_hw_pagetable_destroy(obj);
+}
+
+int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
+{
+ if (hwpt->enforce_cache_coherency)
+ return 0;
+
+ if (hwpt->domain->ops->enforce_cache_coherency)
+ hwpt->enforce_cache_coherency =
+ hwpt->domain->ops->enforce_cache_coherency(
+ hwpt->domain);
+ if (!hwpt->enforce_cache_coherency)
+ return -EINVAL;
+ return 0;
}
/**
@@ -38,6 +65,10 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
* Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
* will be linked to the given ioas and upon return the underlying iommu_domain
* is fully popoulated.
+ *
+ * The caller must hold the ioas->mutex until after
+ * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on
+ * the returned hwpt.
*/
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
@@ -52,9 +83,7 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if (IS_ERR(hwpt))
return hwpt;
- INIT_LIST_HEAD(&hwpt->devices);
INIT_LIST_HEAD(&hwpt->hwpt_item);
- mutex_init(&hwpt->devices_lock);
/* Pairs with iommufd_hw_pagetable_destroy() */
refcount_inc(&ioas->obj.users);
hwpt->ioas = ioas;
@@ -65,7 +94,18 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
- mutex_lock(&hwpt->devices_lock);
+ /*
+ * Set the coherency mode before we do iopt_table_add_domain() as some
+ * iommus have a per-PTE bit that controls it and need to decide before
+ * doing any maps. It is an iommu driver bug to report
+ * IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on
+ * a new domain.
+ */
+ if (idev->enforce_cache_coherency) {
+ rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+ if (WARN_ON(rc))
+ goto out_abort;
+ }
/*
* immediate_attach exists only to accommodate iommu drivers that cannot
@@ -76,30 +116,64 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if (immediate_attach) {
rc = iommufd_hw_pagetable_attach(hwpt, idev);
if (rc)
- goto out_unlock;
+ goto out_abort;
}
rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain);
if (rc)
goto out_detach;
list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
-
- if (immediate_attach) {
- /* See iommufd_device_do_attach() */
- refcount_inc(&hwpt->obj.users);
- idev->hwpt = hwpt;
- list_add(&idev->devices_item, &hwpt->devices);
- }
-
- mutex_unlock(&hwpt->devices_lock);
return hwpt;
out_detach:
if (immediate_attach)
- iommufd_hw_pagetable_detach(hwpt, idev);
-out_unlock:
- mutex_unlock(&hwpt->devices_lock);
+ iommufd_hw_pagetable_detach(idev);
out_abort:
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
return ERR_PTR(rc);
}
+
+int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hwpt_alloc *cmd = ucmd->cmd;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_device *idev;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ if (cmd->flags || cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ idev = iommufd_get_device(ucmd, cmd->dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id);
+ if (IS_ERR(ioas)) {
+ rc = PTR_ERR(ioas);
+ goto out_put_idev;
+ }
+
+ mutex_lock(&ioas->mutex);
+ hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false);
+ if (IS_ERR(hwpt)) {
+ rc = PTR_ERR(hwpt);
+ goto out_unlock;
+ }
+
+ cmd->out_hwpt_id = hwpt->obj.id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_hwpt;
+ iommufd_object_finalize(ucmd->ictx, &hwpt->obj);
+ goto out_unlock;
+
+out_hwpt:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
+out_unlock:
+ mutex_unlock(&ioas->mutex);
+ iommufd_put_object(&ioas->obj);
+out_put_idev:
+ iommufd_put_object(&idev->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 724c4c574241..3a598182b761 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1158,36 +1158,36 @@ out_unlock:
}
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access)
+ struct iommufd_access *access,
+ u32 iopt_access_list_id)
{
down_write(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem);
- WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) !=
- access);
+ WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
WARN_ON(iopt_calculate_iova_alignment(iopt));
up_write(&iopt->iova_rwsem);
up_write(&iopt->domains_rwsem);
}
-/* Narrow the valid_iova_itree to include reserved ranges from a group. */
-int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
- struct device *device,
- struct iommu_group *group,
- phys_addr_t *sw_msi_start)
+/* Narrow the valid_iova_itree to include reserved ranges from a device. */
+int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
+ struct device *dev,
+ phys_addr_t *sw_msi_start)
{
struct iommu_resv_region *resv;
- struct iommu_resv_region *tmp;
- LIST_HEAD(group_resv_regions);
+ LIST_HEAD(resv_regions);
unsigned int num_hw_msi = 0;
unsigned int num_sw_msi = 0;
int rc;
+ if (iommufd_should_fail())
+ return -EINVAL;
+
down_write(&iopt->iova_rwsem);
- rc = iommu_get_group_resv_regions(group, &group_resv_regions);
- if (rc)
- goto out_unlock;
+ /* FIXME: drivers allocate memory but there is no failure propogated */
+ iommu_get_resv_regions(dev, &resv_regions);
- list_for_each_entry(resv, &group_resv_regions, list) {
+ list_for_each_entry(resv, &resv_regions, list) {
if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
continue;
@@ -1199,7 +1199,7 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
}
rc = iopt_reserve_iova(iopt, resv->start,
- resv->length - 1 + resv->start, device);
+ resv->length - 1 + resv->start, dev);
if (rc)
goto out_reserved;
}
@@ -1214,11 +1214,9 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
goto out_free_resv;
out_reserved:
- __iopt_remove_reserved_iova(iopt, device);
+ __iopt_remove_reserved_iova(iopt, dev);
out_free_resv:
- list_for_each_entry_safe(resv, tmp, &group_resv_regions, list)
- kfree(resv);
-out_unlock:
+ iommu_put_resv_regions(dev, &resv_regions);
up_write(&iopt->iova_rwsem);
return rc;
}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f9790983699c..2c58670011fe 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -17,6 +17,7 @@ struct iommufd_device;
struct iommufd_ctx {
struct file *file;
struct xarray objects;
+ struct xarray groups;
u8 account_mode;
/* Compatibility with VFIO no iommu */
@@ -75,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
struct iommu_domain *domain);
void iopt_table_remove_domain(struct io_pagetable *iopt,
struct iommu_domain *domain);
-int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
- struct device *device,
- struct iommu_group *group,
- phys_addr_t *sw_msi_start);
+int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
+ struct device *dev,
+ phys_addr_t *sw_msi_start);
int iopt_set_allow_iova(struct io_pagetable *iopt,
struct rb_root_cached *allowed_iova);
int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
@@ -119,6 +119,7 @@ enum iommufd_object_type {
#ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST,
#endif
+ IOMMUFD_OBJ_MAX,
};
/* Base struct for all objects with a userspace ID handle. */
@@ -148,29 +149,6 @@ static inline void iommufd_put_object(struct iommufd_object *obj)
up_read(&obj->destroy_rwsem);
}
-/**
- * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount
- * protection
- * @obj - Object to release
- *
- * Objects have two refcount protections (destroy_rwsem and the refcount_t
- * users). Holding either of these will prevent the object from being destroyed.
- *
- * Depending on the use case, one protection or the other is appropriate. In
- * most cases references are being protected by the destroy_rwsem. This allows
- * orderly destruction of the object because iommufd_object_destroy_user() will
- * wait for it to become unlocked. However, as a rwsem, it cannot be held across
- * a system call return. So cases that have longer term needs must switch
- * to the weaker users refcount_t.
- *
- * With users protection iommufd_object_destroy_user() will return false,
- * refusing to destroy the object, causing -EBUSY to userspace.
- */
-static inline void iommufd_ref_to_users(struct iommufd_object *obj)
-{
- up_read(&obj->destroy_rwsem);
- /* iommufd_lock_obj() obtains users as well */
-}
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
@@ -260,18 +238,39 @@ struct iommufd_hw_pagetable {
bool msi_cookie : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
- struct mutex devices_lock;
- struct list_head devices;
};
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
struct iommufd_device *idev, bool immediate_attach);
+int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
-void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_detach(struct iommufd_device *idev);
void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
+void iommufd_hw_pagetable_abort(struct iommufd_object *obj);
+int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
+
+static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
+ struct iommufd_hw_pagetable *hwpt)
+{
+ lockdep_assert_not_held(&hwpt->ioas->mutex);
+ if (hwpt->auto_domain)
+ iommufd_object_deref_user(ictx, &hwpt->obj);
+ else
+ refcount_dec(&hwpt->obj.users);
+}
+
+struct iommufd_group {
+ struct kref ref;
+ struct mutex lock;
+ struct iommufd_ctx *ictx;
+ struct iommu_group *group;
+ struct iommufd_hw_pagetable *hwpt;
+ struct list_head device_list;
+ phys_addr_t sw_msi_start;
+};
/*
* A iommufd_device object represents the binding relationship between a
@@ -281,21 +280,30 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
struct iommufd_device {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
- struct iommufd_hw_pagetable *hwpt;
- /* Head at iommufd_hw_pagetable::devices */
- struct list_head devices_item;
+ struct iommufd_group *igroup;
+ struct list_head group_item;
/* always the physical device */
struct device *dev;
- struct iommu_group *group;
bool enforce_cache_coherency;
};
+static inline struct iommufd_device *
+iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_DEVICE),
+ struct iommufd_device, obj);
+}
+
void iommufd_device_destroy(struct iommufd_object *obj);
+int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
struct iommufd_access {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct iommufd_ioas *ioas;
+ struct iommufd_ioas *ioas_unpin;
+ struct mutex ioas_lock;
const struct iommufd_access_ops *ops;
void *data;
unsigned long iova_alignment;
@@ -304,7 +312,8 @@ struct iommufd_access {
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access);
+ struct iommufd_access *access,
+ u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
#ifdef CONFIG_IOMMUFD_TEST
@@ -314,7 +323,7 @@ extern size_t iommufd_test_memory_limit;
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id, u64 *iova, u32 *flags);
bool iommufd_should_fail(void);
-void __init iommufd_test_init(void);
+int __init iommufd_test_init(void);
void iommufd_test_exit(void);
bool iommufd_selftest_is_mock_dev(struct device *dev);
#else
@@ -327,8 +336,9 @@ static inline bool iommufd_should_fail(void)
{
return false;
}
-static inline void __init iommufd_test_init(void)
+static inline int __init iommufd_test_init(void)
{
+ return 0;
}
static inline void iommufd_test_exit(void)
{
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index b3d69cca7729..3f3644375bf1 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -17,6 +17,8 @@ enum {
IOMMU_TEST_OP_ACCESS_PAGES,
IOMMU_TEST_OP_ACCESS_RW,
IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
+ IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE,
+ IOMMU_TEST_OP_ACCESS_REPLACE_IOAS,
};
enum {
@@ -51,8 +53,13 @@ struct iommu_test_cmd {
struct {
__u32 out_stdev_id;
__u32 out_hwpt_id;
+ /* out_idev_id is the standard iommufd_bind object */
+ __u32 out_idev_id;
} mock_domain;
struct {
+ __u32 pt_id;
+ } mock_domain_replace;
+ struct {
__aligned_u64 iova;
__aligned_u64 length;
__aligned_u64 uptr;
@@ -85,9 +92,21 @@ struct iommu_test_cmd {
struct {
__u32 limit;
} memory_limit;
+ struct {
+ __u32 ioas_id;
+ } access_replace_ioas;
};
__u32 last;
};
#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
+/* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */
+#define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef
+#define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef
+
+struct iommu_test_hw_info {
+ __u32 flags;
+ __u32 test_reg;
+};
+
#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 4cf5f73f2708..e71523cbd0de 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -24,6 +24,7 @@
struct iommufd_object_ops {
void (*destroy)(struct iommufd_object *obj);
+ void (*abort)(struct iommufd_object *obj);
};
static const struct iommufd_object_ops iommufd_object_ops[];
static struct miscdevice vfio_misc_dev;
@@ -32,6 +33,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
enum iommufd_object_type type)
{
+ static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX];
struct iommufd_object *obj;
int rc;
@@ -39,7 +41,15 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
if (!obj)
return ERR_PTR(-ENOMEM);
obj->type = type;
- init_rwsem(&obj->destroy_rwsem);
+ /*
+ * In most cases the destroy_rwsem is obtained with try so it doesn't
+ * interact with lockdep, however on destroy we have to sleep. This
+ * means if we have to destroy an object while holding a get on another
+ * object it triggers lockdep. Using one locking class per object type
+ * is a simple and reasonable way to avoid this.
+ */
+ __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem",
+ &obj_keys[type]);
refcount_set(&obj->users, 1);
/*
@@ -50,7 +60,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
* before calling iommufd_object_finalize().
*/
rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
- xa_limit_32b, GFP_KERNEL_ACCOUNT);
+ xa_limit_31b, GFP_KERNEL_ACCOUNT);
if (rc)
goto out_free;
return obj;
@@ -95,7 +105,10 @@ void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj)
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
- iommufd_object_ops[obj->type].destroy(obj);
+ if (iommufd_object_ops[obj->type].abort)
+ iommufd_object_ops[obj->type].abort(obj);
+ else
+ iommufd_object_ops[obj->type].destroy(obj);
iommufd_object_abort(ictx, obj);
}
@@ -223,6 +236,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
}
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
+ xa_init(&ictx->groups);
ictx->file = filp;
filp->private_data = ictx;
return 0;
@@ -258,6 +272,7 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
if (WARN_ON(!destroyed))
break;
}
+ WARN_ON(!xa_empty(&ictx->groups));
kfree(ictx);
return 0;
}
@@ -290,6 +305,8 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
union ucmd_buffer {
struct iommu_destroy destroy;
+ struct iommu_hw_info info;
+ struct iommu_hwpt_alloc hwpt;
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
@@ -321,6 +338,10 @@ struct iommufd_ioctl_op {
}
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
+ IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
+ __reserved),
+ IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
+ __reserved),
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
struct iommu_ioas_alloc, out_ioas_id),
IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
@@ -418,6 +439,30 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD);
/**
+ * iommufd_ctx_from_fd - Acquires a reference to the iommufd context
+ * @fd: File descriptor to obtain the reference from
+ *
+ * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success
+ * the caller is responsible to call iommufd_ctx_put().
+ */
+struct iommufd_ctx *iommufd_ctx_from_fd(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &iommufd_fops) {
+ fput(file);
+ return ERR_PTR(-EBADFD);
+ }
+ /* fget is the same as iommufd_ctx_get() */
+ return file->private_data;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD);
+
+/**
* iommufd_ctx_put - Put back a reference
* @ictx: Context to put back
*/
@@ -439,6 +484,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
},
[IOMMUFD_OBJ_HW_PAGETABLE] = {
.destroy = iommufd_hw_pagetable_destroy,
+ .abort = iommufd_hw_pagetable_abort,
},
#ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = {
@@ -477,8 +523,14 @@ static int __init iommufd_init(void)
if (ret)
goto err_misc;
}
- iommufd_test_init();
+ ret = iommufd_test_init();
+ if (ret)
+ goto err_vfio_misc;
return 0;
+
+err_vfio_misc:
+ if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER))
+ misc_deregister(&vfio_misc_dev);
err_misc:
misc_deregister(&iommu_misc_dev);
return ret;
@@ -499,5 +551,6 @@ module_exit(iommufd_exit);
MODULE_ALIAS_MISCDEV(VFIO_MINOR);
MODULE_ALIAS("devname:vfio/vfio");
#endif
+MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 74c2076105d4..56506d5753f1 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -9,14 +9,17 @@
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/fault-inject.h>
+#include <linux/platform_device.h>
#include <uapi/linux/iommufd.h>
+#include "../iommu-priv.h"
#include "io_pagetable.h"
#include "iommufd_private.h"
#include "iommufd_test.h"
static DECLARE_FAULT_ATTR(fail_iommufd);
static struct dentry *dbgfs_root;
+static struct platform_device *selftest_iommu_dev;
size_t iommufd_test_memory_limit = 65536;
@@ -128,6 +131,21 @@ static struct iommu_domain mock_blocking_domain = {
.ops = &mock_blocking_ops,
};
+static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+ struct iommu_test_hw_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->test_reg = IOMMU_HW_INFO_SELFTEST_REGVAL;
+ *length = sizeof(*info);
+ *type = IOMMU_HW_INFO_TYPE_SELFTEST;
+
+ return info;
+}
+
static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
{
struct mock_iommu_domain *mock;
@@ -135,7 +153,7 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED)
return &mock_blocking_domain;
- if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED))
+ if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
@@ -276,12 +294,23 @@ static void mock_domain_set_plaform_dma_ops(struct device *dev)
*/
}
+static struct iommu_device mock_iommu_device = {
+};
+
+static struct iommu_device *mock_probe_device(struct device *dev)
+{
+ return &mock_iommu_device;
+}
+
static const struct iommu_ops mock_ops = {
.owner = THIS_MODULE,
.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
+ .hw_info = mock_domain_hw_info,
.domain_alloc = mock_domain_alloc,
.capable = mock_domain_capable,
.set_platform_dma_ops = mock_domain_set_plaform_dma_ops,
+ .device_group = generic_device_group,
+ .probe_device = mock_probe_device,
.default_domain_ops =
&(struct iommu_domain_ops){
.free = mock_domain_free,
@@ -292,10 +321,6 @@ static const struct iommu_ops mock_ops = {
},
};
-static struct iommu_device mock_iommu_device = {
- .ops = &mock_ops,
-};
-
static inline struct iommufd_hw_pagetable *
get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
struct mock_iommu_domain **mock)
@@ -316,22 +341,29 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
return hwpt;
}
-static struct bus_type iommufd_mock_bus_type = {
- .name = "iommufd_mock",
- .iommu_ops = &mock_ops,
+struct mock_bus_type {
+ struct bus_type bus;
+ struct notifier_block nb;
+};
+
+static struct mock_bus_type iommufd_mock_bus_type = {
+ .bus = {
+ .name = "iommufd_mock",
+ },
};
+static atomic_t mock_dev_num;
+
static void mock_dev_release(struct device *dev)
{
struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
+ atomic_dec(&mock_dev_num);
kfree(mdev);
}
static struct mock_dev *mock_dev_create(void)
{
- struct iommu_group *iommu_group;
- struct dev_iommu *dev_iommu;
struct mock_dev *mdev;
int rc;
@@ -341,51 +373,18 @@ static struct mock_dev *mock_dev_create(void)
device_initialize(&mdev->dev);
mdev->dev.release = mock_dev_release;
- mdev->dev.bus = &iommufd_mock_bus_type;
-
- iommu_group = iommu_group_alloc();
- if (IS_ERR(iommu_group)) {
- rc = PTR_ERR(iommu_group);
- goto err_put;
- }
+ mdev->dev.bus = &iommufd_mock_bus_type.bus;
rc = dev_set_name(&mdev->dev, "iommufd_mock%u",
- iommu_group_id(iommu_group));
+ atomic_inc_return(&mock_dev_num));
if (rc)
- goto err_group;
-
- /*
- * The iommu core has no way to associate a single device with an iommu
- * driver (heck currently it can't even support two iommu_drivers
- * registering). Hack it together with an open coded dev_iommu_get().
- * Notice that the normal notifier triggered iommu release process also
- * does not work here because this bus is not in iommu_buses.
- */
- mdev->dev.iommu = kzalloc(sizeof(*dev_iommu), GFP_KERNEL);
- if (!mdev->dev.iommu) {
- rc = -ENOMEM;
- goto err_group;
- }
- mutex_init(&mdev->dev.iommu->lock);
- mdev->dev.iommu->iommu_dev = &mock_iommu_device;
+ goto err_put;
rc = device_add(&mdev->dev);
if (rc)
- goto err_dev_iommu;
-
- rc = iommu_group_add_device(iommu_group, &mdev->dev);
- if (rc)
- goto err_del;
- iommu_group_put(iommu_group);
+ goto err_put;
return mdev;
-err_del:
- device_del(&mdev->dev);
-err_dev_iommu:
- kfree(mdev->dev.iommu);
- mdev->dev.iommu = NULL;
-err_group:
- iommu_group_put(iommu_group);
err_put:
put_device(&mdev->dev);
return ERR_PTR(rc);
@@ -393,11 +392,7 @@ err_put:
static void mock_dev_destroy(struct mock_dev *mdev)
{
- iommu_group_remove_device(&mdev->dev);
- device_del(&mdev->dev);
- kfree(mdev->dev.iommu);
- mdev->dev.iommu = NULL;
- put_device(&mdev->dev);
+ device_unregister(&mdev->dev);
}
bool iommufd_selftest_is_mock_dev(struct device *dev)
@@ -443,9 +438,15 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
/* Userspace must destroy the device_id to destroy the object */
cmd->mock_domain.out_hwpt_id = pt_id;
cmd->mock_domain.out_stdev_id = sobj->obj.id;
+ cmd->mock_domain.out_idev_id = idev_id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_detach;
iommufd_object_finalize(ucmd->ictx, &sobj->obj);
- return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ return 0;
+out_detach:
+ iommufd_device_detach(idev);
out_unbind:
iommufd_device_unbind(idev);
out_mdev:
@@ -455,6 +456,42 @@ out_sobj:
return rc;
}
+/* Replace the mock domain with a manually allocated hw_pagetable */
+static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
+ unsigned int device_id, u32 pt_id,
+ struct iommu_test_cmd *cmd)
+{
+ struct iommufd_object *dev_obj;
+ struct selftest_obj *sobj;
+ int rc;
+
+ /*
+ * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure
+ * it doesn't race with detach, which is not allowed.
+ */
+ dev_obj =
+ iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST);
+ if (IS_ERR(dev_obj))
+ return PTR_ERR(dev_obj);
+
+ sobj = container_of(dev_obj, struct selftest_obj, obj);
+ if (sobj->type != TYPE_IDEV) {
+ rc = -EINVAL;
+ goto out_dev_obj;
+ }
+
+ rc = iommufd_device_replace(sobj->idev.idev, &pt_id);
+ if (rc)
+ goto out_dev_obj;
+
+ cmd->mock_domain_replace.pt_id = pt_id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_dev_obj:
+ iommufd_put_object(dev_obj);
+ return rc;
+}
+
/* Add an additional reserved IOVA to the IOAS */
static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
unsigned int mockpt_id,
@@ -748,6 +785,22 @@ out_free_staccess:
return rc;
}
+static int iommufd_test_access_replace_ioas(struct iommufd_ucmd *ucmd,
+ unsigned int access_id,
+ unsigned int ioas_id)
+{
+ struct selftest_access *staccess;
+ int rc;
+
+ staccess = iommufd_access_get(access_id);
+ if (IS_ERR(staccess))
+ return PTR_ERR(staccess);
+
+ rc = iommufd_access_replace(staccess->access, ioas_id);
+ fput(staccess->file);
+ return rc;
+}
+
/* Check that the pages in a page array match the pages in the user VA */
static int iommufd_test_check_pages(void __user *uptr, struct page **pages,
size_t npages)
@@ -948,6 +1001,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->add_reserved.length);
case IOMMU_TEST_OP_MOCK_DOMAIN:
return iommufd_test_mock_domain(ucmd, cmd);
+ case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE:
+ return iommufd_test_mock_domain_replace(
+ ucmd, cmd->id, cmd->mock_domain_replace.pt_id, cmd);
case IOMMU_TEST_OP_MD_CHECK_MAP:
return iommufd_test_md_check_pa(
ucmd, cmd->id, cmd->check_map.iova,
@@ -960,6 +1016,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
case IOMMU_TEST_OP_CREATE_ACCESS:
return iommufd_test_create_access(ucmd, cmd->id,
cmd->create_access.flags);
+ case IOMMU_TEST_OP_ACCESS_REPLACE_IOAS:
+ return iommufd_test_access_replace_ioas(
+ ucmd, cmd->id, cmd->access_replace_ioas.ioas_id);
case IOMMU_TEST_OP_ACCESS_PAGES:
return iommufd_test_access_pages(
ucmd, cmd->id, cmd->access_pages.iova,
@@ -992,15 +1051,57 @@ bool iommufd_should_fail(void)
return should_fail(&fail_iommufd, 1);
}
-void __init iommufd_test_init(void)
+int __init iommufd_test_init(void)
{
+ struct platform_device_info pdevinfo = {
+ .name = "iommufd_selftest_iommu",
+ };
+ int rc;
+
dbgfs_root =
fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd);
- WARN_ON(bus_register(&iommufd_mock_bus_type));
+
+ selftest_iommu_dev = platform_device_register_full(&pdevinfo);
+ if (IS_ERR(selftest_iommu_dev)) {
+ rc = PTR_ERR(selftest_iommu_dev);
+ goto err_dbgfs;
+ }
+
+ rc = bus_register(&iommufd_mock_bus_type.bus);
+ if (rc)
+ goto err_platform;
+
+ rc = iommu_device_sysfs_add(&mock_iommu_device,
+ &selftest_iommu_dev->dev, NULL, "%s",
+ dev_name(&selftest_iommu_dev->dev));
+ if (rc)
+ goto err_bus;
+
+ rc = iommu_device_register_bus(&mock_iommu_device, &mock_ops,
+ &iommufd_mock_bus_type.bus,
+ &iommufd_mock_bus_type.nb);
+ if (rc)
+ goto err_sysfs;
+ return 0;
+
+err_sysfs:
+ iommu_device_sysfs_remove(&mock_iommu_device);
+err_bus:
+ bus_unregister(&iommufd_mock_bus_type.bus);
+err_platform:
+ platform_device_unregister(selftest_iommu_dev);
+err_dbgfs:
+ debugfs_remove_recursive(dbgfs_root);
+ return rc;
}
void iommufd_test_exit(void)
{
+ iommu_device_sysfs_remove(&mock_iommu_device);
+ iommu_device_unregister_bus(&mock_iommu_device,
+ &iommufd_mock_bus_type.bus,
+ &iommufd_mock_bus_type.nb);
+ bus_unregister(&iommufd_mock_bus_type.bus);
+ platform_device_unregister(selftest_iommu_dev);
debugfs_remove_recursive(dbgfs_root);
- bus_unregister(&iommufd_mock_bus_type);
}
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index fe02517c73cc..6c810bf80f99 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -483,6 +483,8 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
rc = cap_size;
goto out_put;
}
+ cap_size = ALIGN(cap_size, sizeof(u64));
+
if (last_cap && info.argsz >= total_cap_size &&
put_user(total_cap_size, &last_cap->next)) {
rc = -EFAULT;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 9f64c5c9f5b9..65ff69477c43 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -14,11 +14,12 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_platform.h>
+#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -253,17 +254,13 @@ static void ipmmu_imuctr_write(struct ipmmu_vmsa_device *mmu,
/* Wait for any pending TLB invalidations to complete */
static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
{
- unsigned int count = 0;
+ u32 val;
- while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) {
- cpu_relax();
- if (++count == TLB_LOOP_TIMEOUT) {
- dev_err_ratelimited(domain->mmu->dev,
+ if (read_poll_timeout_atomic(ipmmu_ctx_read_root, val,
+ !(val & IMCTR_FLUSH), 1, TLB_LOOP_TIMEOUT,
+ false, domain, IMCTR))
+ dev_err_ratelimited(domain->mmu->dev,
"TLB sync timed out -- MMU may be deadlocked\n");
- return;
- }
- udelay(1);
- }
}
static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
@@ -723,6 +720,10 @@ static bool ipmmu_device_is_allowed(struct device *dev)
if (soc_device_match(soc_denylist))
return false;
+ /* Check whether this device is a PCI device */
+ if (dev_is_pci(dev))
+ return true;
+
/* Check whether this device can work with the IPMMU */
for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) {
if (!strcmp(dev_name(dev), devices_allowlist[i]))
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index e93906d6e112..640275873a27 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -3,6 +3,7 @@
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Yong Wu <yong.wu@mediatek.com>
*/
+#include <linux/arm-smccc.h>
#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/clk.h>
@@ -27,6 +28,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/soc/mediatek/infracfg.h>
+#include <linux/soc/mediatek/mtk_sip_svc.h>
#include <asm/barrier.h>
#include <soc/mediatek/smi.h>
@@ -143,6 +145,7 @@
#define PGTABLE_PA_35_EN BIT(17)
#define TF_PORT_TO_ADDR_MT8173 BIT(18)
#define INT_ID_PORT_WIDTH_6 BIT(19)
+#define CFG_IFA_MASTER_IN_ATF BIT(20)
#define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \
((((pdata)->flags) & (mask)) == (_x))
@@ -167,6 +170,7 @@ enum mtk_iommu_plat {
M4U_MT8173,
M4U_MT8183,
M4U_MT8186,
+ M4U_MT8188,
M4U_MT8192,
M4U_MT8195,
M4U_MT8365,
@@ -258,6 +262,8 @@ struct mtk_iommu_data {
struct device *smicomm_dev;
struct mtk_iommu_bank_data *bank;
+ struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */
+
struct regmap *pericfg;
struct mutex mutex; /* Protect m4u_group/m4u_dom above */
@@ -577,41 +583,55 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
unsigned int larbid, portid;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
const struct mtk_iommu_iova_region *region;
- u32 peri_mmuen, peri_mmuen_msk;
+ unsigned long portid_msk = 0;
+ struct arm_smccc_res res;
int i, ret = 0;
for (i = 0; i < fwspec->num_ids; ++i) {
- larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
+ portid_msk |= BIT(portid);
+ }
- if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
- larb_mmu = &data->larb_imu[larbid];
+ if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
+ /* All ports should be in the same larb. just use 0 here */
+ larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
+ larb_mmu = &data->larb_imu[larbid];
+ region = data->plat_data->iova_region + regionid;
- region = data->plat_data->iova_region + regionid;
+ for_each_set_bit(portid, &portid_msk, 32)
larb_mmu->bank[portid] = upper_32_bits(region->iova_base);
- dev_dbg(dev, "%s iommu for larb(%s) port %d region %d rgn-bank %d.\n",
- enable ? "enable" : "disable", dev_name(larb_mmu->dev),
- portid, regionid, larb_mmu->bank[portid]);
+ dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n",
+ enable ? "enable" : "disable", dev_name(larb_mmu->dev),
+ portid_msk, regionid, upper_32_bits(region->iova_base));
- if (enable)
- larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
- else
- larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid);
- } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) {
- peri_mmuen_msk = BIT(portid);
+ if (enable)
+ larb_mmu->mmu |= portid_msk;
+ else
+ larb_mmu->mmu &= ~portid_msk;
+ } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) {
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) {
+ arm_smccc_smc(MTK_SIP_KERNEL_IOMMU_CONTROL,
+ IOMMU_ATF_CMD_CONFIG_INFRA_IOMMU,
+ portid_msk, enable, 0, 0, 0, 0, &res);
+ ret = res.a0;
+ } else {
/* PCI dev has only one output id, enable the next writing bit for PCIe */
- if (dev_is_pci(dev))
- peri_mmuen_msk |= BIT(portid + 1);
+ if (dev_is_pci(dev)) {
+ if (fwspec->num_ids != 1) {
+ dev_err(dev, "PCI dev can only have one port.\n");
+ return -ENODEV;
+ }
+ portid_msk |= BIT(portid + 1);
+ }
- peri_mmuen = enable ? peri_mmuen_msk : 0;
ret = regmap_update_bits(data->pericfg, PERICFG_IOMMU_1,
- peri_mmuen_msk, peri_mmuen);
- if (ret)
- dev_err(dev, "%s iommu(%s) inframaster 0x%x fail(%d).\n",
- enable ? "enable" : "disable",
- dev_name(data->dev), peri_mmuen_msk, ret);
+ (u32)portid_msk, enable ? (u32)portid_msk : 0);
}
+ if (ret)
+ dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n",
+ enable ? "enable" : "disable",
+ dev_name(data->dev), portid_msk, ret);
}
return ret;
}
@@ -620,15 +640,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
struct mtk_iommu_data *data,
unsigned int region_id)
{
+ struct mtk_iommu_domain *share_dom = data->share_dom;
const struct mtk_iommu_iova_region *region;
- struct mtk_iommu_domain *m4u_dom;
-
- /* Always use bank0 in sharing pgtable case */
- m4u_dom = data->bank[0].m4u_dom;
- if (m4u_dom) {
- dom->iop = m4u_dom->iop;
- dom->cfg = m4u_dom->cfg;
- dom->domain.pgsize_bitmap = m4u_dom->cfg.pgsize_bitmap;
+
+ /* Always use share domain in sharing pgtable case */
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) {
+ dom->iop = share_dom->iop;
+ dom->cfg = share_dom->cfg;
+ dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap;
goto update_iova_region;
}
@@ -658,6 +677,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
/* Update our support page sizes bitmap */
dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE))
+ data->share_dom = dom;
+
update_iova_region:
/* Update the iova region for this domain */
region = data->plat_data->iova_region + region_id;
@@ -708,7 +730,9 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
/* Data is in the frstdata in sharing pgtable case. */
frstdata = mtk_iommu_get_frst_data(hw_list);
+ mutex_lock(&frstdata->mutex);
ret = mtk_iommu_domain_finalise(dom, frstdata, region_id);
+ mutex_unlock(&frstdata->mutex);
if (ret) {
mutex_unlock(&dom->mutex);
return ret;
@@ -1318,7 +1342,8 @@ static int mtk_iommu_probe(struct platform_device *pdev)
dev_err_probe(dev, ret, "mm dts parse fail\n");
goto out_runtime_disable;
}
- } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) {
+ } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) &&
+ !MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) {
p = data->plat_data->pericfg_comp_str;
data->pericfg = syscon_regmap_lookup_by_compatible(p);
if (IS_ERR(data->pericfg)) {
@@ -1570,6 +1595,67 @@ static const struct mtk_iommu_plat_data mt8186_data_mm = {
.iova_region_larb_msk = mt8186_larb_region_msk,
};
+static const struct mtk_iommu_plat_data mt8188_data_infra = {
+ .m4u_plat = M4U_MT8188,
+ .flags = WR_THROT_EN | DCM_DISABLE | STD_AXI_MODE | PM_CLK_AO |
+ MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT |
+ PGTABLE_PA_35_EN | CFG_IFA_MASTER_IN_ATF,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .banks_num = 1,
+ .banks_enable = {true},
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
+};
+
+static const u32 mt8188_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = {
+ [0] = {~0, ~0, ~0, ~0}, /* Region0: all ports for larb0/1/2/3 */
+ [1] = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, ~0, ~0, ~0}, /* Region1: larb19(21)/21(22)/23 */
+ [2] = {0, 0, 0, 0, ~0, ~0, ~0, ~0, /* Region2: the other larbs. */
+ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
+ ~0, ~0, ~0, ~0, ~0, 0, 0, 0,
+ 0, ~0},
+ [3] = {0},
+ [4] = {[24] = BIT(0) | BIT(1)}, /* Only larb27(24) port0/1 */
+ [5] = {[24] = BIT(2) | BIT(3)}, /* Only larb27(24) port2/3 */
+};
+
+static const struct mtk_iommu_plat_data mt8188_data_vdo = {
+ .m4u_plat = M4U_MT8188,
+ .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN |
+ WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE |
+ PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM,
+ .hw_list = &m4ulist,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .banks_num = 1,
+ .banks_enable = {true},
+ .iova_region = mt8192_multi_dom,
+ .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
+ .iova_region_larb_msk = mt8188_larb_region_msk,
+ .larbid_remap = {{2}, {0}, {21}, {0}, {19}, {9, 10,
+ 11 /* 11a */, 25 /* 11c */},
+ {13, 0, 29 /* 16b */, 30 /* 17b */, 0}, {5}},
+};
+
+static const struct mtk_iommu_plat_data mt8188_data_vpp = {
+ .m4u_plat = M4U_MT8188,
+ .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN |
+ WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE |
+ PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM,
+ .hw_list = &m4ulist,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .banks_num = 1,
+ .banks_enable = {true},
+ .iova_region = mt8192_multi_dom,
+ .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
+ .iova_region_larb_msk = mt8188_larb_region_msk,
+ .larbid_remap = {{1}, {3}, {23}, {7}, {MTK_INVALID_LARBID},
+ {12, 15, 24 /* 11b */}, {14, MTK_INVALID_LARBID,
+ 16 /* 16a */, 17 /* 17a */, MTK_INVALID_LARBID,
+ 27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}},
+};
+
static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = {
[0] = {~0, ~0}, /* Region0: larb0/1 */
[1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */
@@ -1678,6 +1764,9 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
{ .compatible = "mediatek,mt8186-iommu-mm", .data = &mt8186_data_mm}, /* mm: m4u */
+ { .compatible = "mediatek,mt8188-iommu-infra", .data = &mt8188_data_infra},
+ { .compatible = "mediatek,mt8188-iommu-vdo", .data = &mt8188_data_vdo},
+ { .compatible = "mediatek,mt8188-iommu-vpp", .data = &mt8188_data_vpp},
{ .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data},
{ .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra},
{ .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo},
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 40f57d293a79..157b286e36bf 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -159,7 +159,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
* If we have reason to believe the IOMMU driver missed the initial
* probe for dev, replay it to get things in order.
*/
- if (!err && dev->bus && !device_iommu_mapped(dev))
+ if (!err && dev->bus)
err = iommu_probe_device(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4054030c3237..8ff69fbf9f65 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -98,9 +98,8 @@ struct rk_iommu_ops {
phys_addr_t (*pt_address)(u32 dte);
u32 (*mk_dtentries)(dma_addr_t pt_dma);
u32 (*mk_ptentries)(phys_addr_t page, int prot);
- phys_addr_t (*dte_addr_phys)(u32 addr);
- u32 (*dma_addr_dte)(dma_addr_t dt_dma);
u64 dma_bit_mask;
+ gfp_t gfp_flags;
};
struct rk_iommu {
@@ -278,8 +277,8 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
/*
* In v2:
* 31:12 - Page address bit 31:0
- * 11:9 - Page address bit 34:32
- * 8:4 - Page address bit 39:35
+ * 11: 8 - Page address bit 35:32
+ * 7: 4 - Page address bit 39:36
* 3 - Security
* 2 - Writable
* 1 - Readable
@@ -506,7 +505,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
/*
* Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
- * and verifying that upper 5 nybbles are read back.
+ * and verifying that upper 5 (v1) or 7 (v2) nybbles are read back.
*/
for (i = 0; i < iommu->num_mmu; i++) {
dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
@@ -531,33 +530,6 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
return 0;
}
-static inline phys_addr_t rk_dte_addr_phys(u32 addr)
-{
- return (phys_addr_t)addr;
-}
-
-static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
-{
- return dt_dma;
-}
-
-#define DT_HI_MASK GENMASK_ULL(39, 32)
-#define DTE_BASE_HI_MASK GENMASK(11, 4)
-#define DT_SHIFT 28
-
-static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
-{
- u64 addr64 = addr;
- return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) |
- ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT);
-}
-
-static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
-{
- return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
- ((dt_dma & DT_HI_MASK) >> DT_SHIFT);
-}
-
static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
{
void __iomem *base = iommu->bases[index];
@@ -577,7 +549,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
page_offset = rk_iova_page_offset(iova);
mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
- mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
+ mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr);
dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
dte_addr = phys_to_virt(dte_addr_phys);
@@ -756,7 +728,7 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
if (rk_dte_is_pt_valid(dte))
goto done;
- page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32);
+ page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | rk_ops->gfp_flags);
if (!page_table)
return ERR_PTR(-ENOMEM);
@@ -967,7 +939,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
for (i = 0; i < iommu->num_mmu; i++) {
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
- rk_ops->dma_addr_dte(rk_domain->dt_dma));
+ rk_ops->mk_dtentries(rk_domain->dt_dma));
rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
}
@@ -1105,7 +1077,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
* Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
* Allocate one 4 KiB page for each table.
*/
- rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
+ rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | rk_ops->gfp_flags);
if (!rk_domain->dt)
goto err_free_domain;
@@ -1405,18 +1377,16 @@ static struct rk_iommu_ops iommu_data_ops_v1 = {
.pt_address = &rk_dte_pt_address,
.mk_dtentries = &rk_mk_dte,
.mk_ptentries = &rk_mk_pte,
- .dte_addr_phys = &rk_dte_addr_phys,
- .dma_addr_dte = &rk_dma_addr_dte,
.dma_bit_mask = DMA_BIT_MASK(32),
+ .gfp_flags = GFP_DMA32,
};
static struct rk_iommu_ops iommu_data_ops_v2 = {
.pt_address = &rk_dte_pt_address_v2,
.mk_dtentries = &rk_mk_dte_v2,
.mk_ptentries = &rk_mk_pte_v2,
- .dte_addr_phys = &rk_dte_addr_phys_v2,
- .dma_addr_dte = &rk_dma_addr_dte_v2,
.dma_bit_mask = DMA_BIT_MASK(40),
+ .gfp_flags = 0,
};
static const struct of_device_id rk_iommu_dt_ids[] = {
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 39e34fdeccda..2fa9afebd4f5 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -14,6 +14,7 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@@ -148,6 +149,7 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
dom->domain.geometry.aperture_start = 0;
dom->domain.geometry.aperture_end = SZ_256M - 1;
+ dom->domain.geometry.force_aperture = true;
return &dom->domain;
}
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 1cbf063ccf14..e445f80d0226 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -9,7 +9,7 @@
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 3551ed057774..17dcd826f5c2 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -13,7 +13,7 @@
#include <linux/interval_tree.h>
#include <linux/iommu.h>
#include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>