diff options
Diffstat (limited to 'drivers/iommu')
42 files changed, 2063 insertions, 946 deletions
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0c35018239ce..e2857109e966 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -12,13 +12,14 @@ #include "amd_iommu_types.h" irqreturn_t amd_iommu_int_thread(int irq, void *data); +irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); +irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); +irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); void amd_iommu_restart_event_logging(struct amd_iommu *iommu); void amd_iommu_restart_ga_log(struct amd_iommu *iommu); -int amd_iommu_init_devices(void); -void amd_iommu_uninit_devices(void); -void amd_iommu_init_notifier(void); +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu); void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index dc1db6167927..7dc30c2b56b3 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -120,10 +120,13 @@ #define PASID_MASK 0x0000ffff /* MMIO status bits */ -#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0) +#define MMIO_STATUS_EVT_OVERFLOW_MASK BIT(0) #define MMIO_STATUS_EVT_INT_MASK BIT(1) #define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2) +#define MMIO_STATUS_EVT_RUN_MASK BIT(3) +#define MMIO_STATUS_PPR_OVERFLOW_MASK BIT(5) #define MMIO_STATUS_PPR_INT_MASK BIT(6) +#define MMIO_STATUS_PPR_RUN_MASK BIT(7) #define MMIO_STATUS_GALOG_RUN_MASK BIT(8) #define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9) #define MMIO_STATUS_GALOG_INT_MASK BIT(10) @@ -381,15 +384,15 @@ */ #define DTE_FLAG_V BIT_ULL(0) #define DTE_FLAG_TV BIT_ULL(1) +#define DTE_FLAG_GIOV BIT_ULL(54) +#define DTE_FLAG_GV BIT_ULL(55) +#define DTE_GLX_SHIFT (56) +#define DTE_GLX_MASK (3) #define DTE_FLAG_IR BIT_ULL(61) #define DTE_FLAG_IW BIT_ULL(62) #define DTE_FLAG_IOTLB BIT_ULL(32) -#define DTE_FLAG_GIOV BIT_ULL(54) -#define DTE_FLAG_GV BIT_ULL(55) #define DTE_FLAG_MASK (0x3ffULL << 32) -#define DTE_GLX_SHIFT (56) -#define DTE_GLX_MASK (3) #define DEV_DOMID_MASK 0xffffULL #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) @@ -702,12 +705,21 @@ struct amd_iommu { /* event buffer virtual address */ u8 *evt_buf; + /* Name for event log interrupt */ + unsigned char evt_irq_name[16]; + /* Base of the PPR log, if present */ u8 *ppr_log; + /* Name for PPR log interrupt */ + unsigned char ppr_irq_name[16]; + /* Base of the GA log, if present */ u8 *ga_log; + /* Name for GA log interrupt */ + unsigned char ga_irq_name[16]; + /* Tail of the GA log, if present */ u8 *ga_log_tail; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index ea0f1ab94178..45efb7e5d725 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -483,6 +483,10 @@ static void iommu_disable(struct amd_iommu *iommu) iommu_feature_disable(iommu, CONTROL_GALOG_EN); iommu_feature_disable(iommu, CONTROL_GAINT_EN); + /* Disable IOMMU PPR logging */ + iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); + iommu_feature_disable(iommu, CONTROL_PPRINT_EN); + /* Disable IOMMU hardware itself */ iommu_feature_disable(iommu, CONTROL_IOMMU_EN); @@ -753,37 +757,61 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) } /* + * Interrupt handler has processed all pending events and adjusted head + * and tail pointer. Reset overflow mask and restart logging again. + */ +static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask) +{ + u32 status; + + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & status_run_mask) + return; + + pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); + + iommu_feature_disable(iommu, cntrl_log); + iommu_feature_disable(iommu, cntrl_intr); + + writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); + + iommu_feature_enable(iommu, cntrl_intr); + iommu_feature_enable(iommu, cntrl_log); +} + +/* * This function restarts event logging in case the IOMMU experienced * an event log buffer overflow. */ void amd_iommu_restart_event_logging(struct amd_iommu *iommu) { - iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, + CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, + MMIO_STATUS_EVT_OVERFLOW_MASK); } /* * This function restarts event logging in case the IOMMU experienced - * an GA log overflow. + * GA log overflow. */ void amd_iommu_restart_ga_log(struct amd_iommu *iommu) { - u32 status; - - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - if (status & MMIO_STATUS_GALOG_RUN_MASK) - return; - - pr_info_ratelimited("IOMMU GA Log restarting\n"); - - iommu_feature_disable(iommu, CONTROL_GALOG_EN); - iommu_feature_disable(iommu, CONTROL_GAINT_EN); - - writel(MMIO_STATUS_GALOG_OVERFLOW_MASK, - iommu->mmio_base + MMIO_STATUS_OFFSET); + amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, + CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, + MMIO_STATUS_GALOG_OVERFLOW_MASK); +} - iommu_feature_enable(iommu, CONTROL_GAINT_EN); - iommu_feature_enable(iommu, CONTROL_GALOG_EN); +/* + * This function restarts ppr logging in case the IOMMU experienced + * PPR log overflow. + */ +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) +{ + amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, + CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK); } /* @@ -906,6 +934,8 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) if (iommu->ppr_log == NULL) return; + iommu_feature_enable(iommu, CONTROL_PPR_EN); + entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, @@ -916,7 +946,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); - iommu_feature_enable(iommu, CONTROL_PPR_EN); + iommu_feature_enable(iommu, CONTROL_PPRINT_EN); } static void __init free_ppr_log(struct amd_iommu *iommu) @@ -2311,6 +2341,7 @@ static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq struct irq_data *irqd = irq_domain_get_irq_data(domain, i); irqd->chip = &intcapxt_controller; + irqd->hwirq = info->hwirq; irqd->chip_data = info->data; __irq_set_handler(i, handle_edge_irq, 0, "edge"); } @@ -2337,22 +2368,14 @@ static void intcapxt_unmask_irq(struct irq_data *irqd) xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); xt.destid_24_31 = cfg->dest_apicid >> 24; - /** - * Current IOMMU implementation uses the same IRQ for all - * 3 IOMMU interrupts. - */ - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); } static void intcapxt_mask_irq(struct irq_data *irqd) { struct amd_iommu *iommu = irqd->chip_data; - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(0, iommu->mmio_base + irqd->hwirq); } @@ -2415,7 +2438,8 @@ static struct irq_domain *iommu_get_irqdomain(void) return iommu_irqdomain; } -static int iommu_setup_intcapxt(struct amd_iommu *iommu) +static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, + int hwirq, irq_handler_t thread_fn) { struct irq_domain *domain; struct irq_alloc_info info; @@ -2429,6 +2453,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_AMDVI; info.data = iommu; + info.hwirq = hwirq; irq = irq_domain_alloc_irqs(domain, 1, node, &info); if (irq < 0) { @@ -2437,7 +2462,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) } ret = request_threaded_irq(irq, amd_iommu_int_handler, - amd_iommu_int_thread, 0, "AMD-Vi", iommu); + thread_fn, 0, devname, iommu); if (ret) { irq_domain_free_irqs(irq, 1); irq_domain_remove(domain); @@ -2447,6 +2472,37 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) return 0; } +static int iommu_setup_intcapxt(struct amd_iommu *iommu) +{ + int ret; + + snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), + "AMD-Vi%d-Evt", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, + MMIO_INTCAPXT_EVT_OFFSET, + amd_iommu_int_thread_evtlog); + if (ret) + return ret; + + snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), + "AMD-Vi%d-PPR", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, + MMIO_INTCAPXT_PPR_OFFSET, + amd_iommu_int_thread_pprlog); + if (ret) + return ret; + +#ifdef CONFIG_IRQ_REMAP + snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), + "AMD-Vi%d-GA", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, + MMIO_INTCAPXT_GALOG_OFFSET, + amd_iommu_int_thread_galog); +#endif + + return ret; +} + static int iommu_init_irq(struct amd_iommu *iommu) { int ret; @@ -2472,8 +2528,6 @@ enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - if (iommu->ppr_log != NULL) - iommu_feature_enable(iommu, CONTROL_PPRINT_EN); return 0; } @@ -2889,8 +2943,6 @@ static void enable_iommus_vapic(void) static void enable_iommus(void) { early_enable_iommus(); - enable_iommus_vapic(); - enable_iommus_v2(); } static void disable_iommus(void) @@ -3154,6 +3206,13 @@ static int amd_iommu_enable_interrupts(void) goto out; } + /* + * Interrupt handler is ready to process interrupts. Enable + * PPR and GA log interrupt for all IOMMUs. + */ + enable_iommus_vapic(); + enable_iommus_v2(); + out: return ret; } @@ -3233,8 +3292,6 @@ static int __init state_next(void) register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; - enable_iommus_vapic(); - enable_iommus_v2(); break; case IOMMU_PCI_INIT: ret = amd_iommu_enable_interrupts(); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c3b58a8389b9..95bd7c25ba6f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -841,50 +841,27 @@ static inline void amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } #endif /* !CONFIG_IRQ_REMAP */ -#define AMD_IOMMU_INT_MASK \ - (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \ - MMIO_STATUS_EVT_INT_MASK | \ - MMIO_STATUS_PPR_INT_MASK | \ - MMIO_STATUS_GALOG_OVERFLOW_MASK | \ - MMIO_STATUS_GALOG_INT_MASK) - -irqreturn_t amd_iommu_int_thread(int irq, void *data) +static void amd_iommu_handle_irq(void *data, const char *evt_type, + u32 int_mask, u32 overflow_mask, + void (*int_handler)(struct amd_iommu *), + void (*overflow_handler)(struct amd_iommu *)) { struct amd_iommu *iommu = (struct amd_iommu *) data; u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + u32 mask = int_mask | overflow_mask; - while (status & AMD_IOMMU_INT_MASK) { + while (status & mask) { /* Enable interrupt sources again */ - writel(AMD_IOMMU_INT_MASK, - iommu->mmio_base + MMIO_STATUS_OFFSET); + writel(mask, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (status & MMIO_STATUS_EVT_INT_MASK) { - pr_devel("Processing IOMMU Event Log\n"); - iommu_poll_events(iommu); + if (int_handler) { + pr_devel("Processing IOMMU (ivhd%d) %s Log\n", + iommu->index, evt_type); + int_handler(iommu); } - if (status & MMIO_STATUS_PPR_INT_MASK) { - pr_devel("Processing IOMMU PPR Log\n"); - iommu_poll_ppr_log(iommu); - } - -#ifdef CONFIG_IRQ_REMAP - if (status & (MMIO_STATUS_GALOG_INT_MASK | - MMIO_STATUS_GALOG_OVERFLOW_MASK)) { - pr_devel("Processing IOMMU GA Log\n"); - iommu_poll_ga_log(iommu); - } - - if (status & MMIO_STATUS_GALOG_OVERFLOW_MASK) { - pr_info_ratelimited("IOMMU GA Log overflow\n"); - amd_iommu_restart_ga_log(iommu); - } -#endif - - if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) { - pr_info_ratelimited("IOMMU event log overflow\n"); - amd_iommu_restart_event_logging(iommu); - } + if ((status & overflow_mask) && overflow_handler) + overflow_handler(iommu); /* * Hardware bug: ERBT1312 @@ -901,6 +878,43 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) */ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); } +} + +irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data) +{ + amd_iommu_handle_irq(data, "Evt", MMIO_STATUS_EVT_INT_MASK, + MMIO_STATUS_EVT_OVERFLOW_MASK, + iommu_poll_events, amd_iommu_restart_event_logging); + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data) +{ + amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK, + iommu_poll_ppr_log, amd_iommu_restart_ppr_log); + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread_galog(int irq, void *data) +{ +#ifdef CONFIG_IRQ_REMAP + amd_iommu_handle_irq(data, "GA", MMIO_STATUS_GALOG_INT_MASK, + MMIO_STATUS_GALOG_OVERFLOW_MASK, + iommu_poll_ga_log, amd_iommu_restart_ga_log); +#endif + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread(int irq, void *data) +{ + amd_iommu_int_thread_evtlog(irq, data); + amd_iommu_int_thread_pprlog(irq, data); + amd_iommu_int_thread_galog(irq, data); + return IRQ_HANDLED; } @@ -3681,7 +3695,7 @@ static int amd_ir_set_affinity(struct irq_data *data, * at the new destination. So, time to cleanup the previous * vector allocation. */ - send_cleanup_vector(cfg); + vector_schedule_cleanup(cfg); return IRQ_SET_MASK_OK_DONE; } diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 261352a23271..57c2fb1146e2 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -262,8 +262,8 @@ static void put_pasid_state(struct pasid_state *pasid_state) static void put_pasid_state_wait(struct pasid_state *pasid_state) { - refcount_dec(&pasid_state->count); - wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); + if (!refcount_dec_and_test(&pasid_state->count)) + wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); free_pasid_state(pasid_state); } @@ -327,6 +327,9 @@ static void free_pasid_states(struct device_state *dev_state) put_pasid_state(pasid_state); + /* Clear the pasid state so that the pasid can be re-used */ + clear_pasid_state(dev_state, pasid_state->pasid); + /* * This will call the mn_release function and * unbind the PASID @@ -355,9 +358,9 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn) return container_of(mn, struct pasid_state, mn); } -static void mn_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) +static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) { struct pasid_state *pasid_state; struct device_state *dev_state; @@ -391,8 +394,8 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) } static const struct mmu_notifier_ops iommu_mn = { - .release = mn_release, - .invalidate_range = mn_invalidate_range, + .release = mn_release, + .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs, }; static void set_pri_tag_status(struct pasid_state *pasid_state, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 8af64b57f048..2082081402d3 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -1276,7 +1276,7 @@ static __maybe_unused int apple_dart_resume(struct device *dev) return 0; } -DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a5a63b1c947e..4d83edc2be99 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -80,7 +80,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); @@ -186,9 +186,10 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) } } -static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) +static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; @@ -200,10 +201,20 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, * range. So do a simple translation here by calculating size correctly. */ size = end - start; + if (size == ULONG_MAX) + size = 0; + + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, + smmu_mn->cd->asid); + else + arm_smmu_tlb_inv_range_asid(start, size, + smmu_mn->cd->asid, + PAGE_SIZE, false, + smmu_domain); + } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) - arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, - PAGE_SIZE, false, smmu_domain); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); } @@ -237,9 +248,9 @@ static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) } static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { - .invalidate_range = arm_smmu_mm_invalidate_range, - .release = arm_smmu_mm_release, - .free_notifier = arm_smmu_mmu_notifier_free, + .arch_invalidate_secondary_tlbs = arm_smmu_mm_arch_invalidate_secondary_tlbs, + .release = arm_smmu_mm_release, + .free_notifier = arm_smmu_mmu_notifier_free, }; /* Allocate or get existing MMU notifier for this {domain, mm} pair */ diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9b0dc3505601..e82bf1c449a3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1059,7 +1059,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, /* * This function handles the following cases: * - * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). * (2) Install a secondary CD, for SID+SSID traffic. * (3) Update ASID of a CD. Atomically write the first 64 bits of the * CD, then invalidate the old entry and mappings. @@ -1607,7 +1607,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) sid = FIELD_GET(PRIQ_0_SID, evt[0]); ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); - ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; + ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); @@ -1748,7 +1748,7 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, */ *cmd = (struct arm_smmu_cmdq_ent) { .opcode = CMDQ_OP_ATC_INV, - .substream_valid = !!ssid, + .substream_valid = (ssid != IOMMU_NO_PASID), .atc.ssid = ssid, }; @@ -1795,7 +1795,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -1875,7 +1875,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -1968,7 +1968,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2055,24 +2055,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) return &smmu_domain->domain; } -static int arm_smmu_bitmap_alloc(unsigned long *map, int span) -{ - int idx, size = 1 << span; - - do { - idx = find_first_zero_bit(map, size); - if (idx == size) - return -ENOSPC; - } while (test_and_set_bit(idx, map)); - - return idx; -} - -static void arm_smmu_bitmap_free(unsigned long *map, int idx) -{ - clear_bit(idx, map); -} - static void arm_smmu_domain_free(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2093,7 +2075,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; if (cfg->vmid) - arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid); + ida_free(&smmu->vmid_map, cfg->vmid); } kfree(smmu_domain); @@ -2142,7 +2124,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, * the master has been added to the devices list for this domain. * This isn't an issue because the STE hasn't been installed yet. */ - ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); if (ret) goto out_free_cd_tables; @@ -2167,7 +2149,9 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; - vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); + /* Reserve VMID 0 for stage-2 bypass STEs */ + vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, + GFP_KERNEL); if (vmid < 0) return vmid; @@ -2328,7 +2312,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) pdev = to_pci_dev(master->dev); atomic_inc(&smmu_domain->nr_ats_masters); - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } @@ -3098,8 +3082,8 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) reg |= STRTAB_BASE_RA; smmu->strtab_cfg.strtab_base = reg; - /* Allocate the first VMID for stage-2 bypass STEs */ - set_bit(0, smmu->vmid_map); + ida_init(&smmu->vmid_map); + return 0; } @@ -3923,6 +3907,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); iopf_queue_free(smmu->evtq.iopf); + ida_destroy(&smmu->vmid_map); } static void arm_smmu_device_shutdown(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index dcab85698a4e..9915850dd4db 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -670,7 +670,7 @@ struct arm_smmu_device { #define ARM_SMMU_MAX_VMIDS (1 << 16) unsigned int vmid_bits; - DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS); + struct ida vmid_map; unsigned int ssid_bits; unsigned int sid_bits; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index b5b14108e086..bb89d49adf8d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -3,7 +3,7 @@ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ -#include <linux/of_device.h> +#include <linux/device.h> #include <linux/firmware/qcom/qcom_scm.h> #include <linux/ratelimit.h> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index c71afda79d64..7f52ac67495f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -251,10 +251,12 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sc8280xp-mdss" }, - { .compatible = "qcom,sm8150-mdss" }, - { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6350-mdss" }, + { .compatible = "qcom,sm6375-mdss" }, + { .compatible = "qcom,sm8150-mdss" }, + { .compatible = "qcom,sm8250-mdss" }, { } }; @@ -528,6 +530,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index a86acd76c1df..d6d1a2a55cc0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -29,7 +29,6 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index a503ed758ec3..775a3cbaff4e 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -22,8 +22,7 @@ #include <linux/init.h> #include <linux/mutex.h> #include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_device.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/pm_runtime.h> @@ -51,14 +50,15 @@ struct qcom_iommu_dev { struct clk_bulk_data clks[CLK_NUM]; void __iomem *local_base; u32 sec_id; - u8 num_ctxs; - struct qcom_iommu_ctx *ctxs[]; /* indexed by asid-1 */ + u8 max_asid; + struct qcom_iommu_ctx *ctxs[]; /* indexed by asid */ }; struct qcom_iommu_ctx { struct device *dev; void __iomem *base; bool secure_init; + bool secured_ctx; u8 asid; /* asid and ctx bank # are 1:1 */ struct iommu_domain *domain; }; @@ -94,7 +94,7 @@ static struct qcom_iommu_ctx * to_ctx(struct qcom_iommu_domain *d, unsigned asid struct qcom_iommu_dev *qcom_iommu = d->iommu; if (!qcom_iommu) return NULL; - return qcom_iommu->ctxs[asid - 1]; + return qcom_iommu->ctxs[asid]; } static inline void @@ -273,6 +273,19 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, ctx->secure_init = true; } + /* Secured QSMMU-500/QSMMU-v2 contexts cannot be programmed */ + if (ctx->secured_ctx) { + ctx->domain = domain; + continue; + } + + /* Disable context bank before programming */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + + /* Clear context bank fault address fault status registers */ + iommu_writel(ctx, ARM_SMMU_CB_FAR, 0); + iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, pgtbl_cfg.arm_lpae_s1_cfg.ttbr | @@ -527,11 +540,10 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) qcom_iommu = platform_get_drvdata(iommu_pdev); /* make sure the asid specified in dt is valid, so we don't have - * to sanity check this elsewhere, since 'asid - 1' is used to - * index into qcom_iommu->ctxs: + * to sanity check this elsewhere: */ - if (WARN_ON(asid < 1) || - WARN_ON(asid > qcom_iommu->num_ctxs)) { + if (WARN_ON(asid > qcom_iommu->max_asid) || + WARN_ON(qcom_iommu->ctxs[asid] == NULL)) { put_device(&iommu_pdev->dev); return -EINVAL; } @@ -617,7 +629,8 @@ free_mem: static int get_asid(const struct device_node *np) { - u32 reg; + u32 reg, val; + int asid; /* read the "reg" property directly to get the relative address * of the context bank, and calculate the asid from that: @@ -625,7 +638,17 @@ static int get_asid(const struct device_node *np) if (of_property_read_u32_index(np, "reg", 0, ®)) return -ENODEV; - return reg / 0x1000; /* context banks are 0x1000 apart */ + /* + * Context banks are 0x1000 apart but, in some cases, the ASID + * number doesn't match to this logic and needs to be passed + * from the DT configuration explicitly. + */ + if (!of_property_read_u32(np, "qcom,ctx-asid", &val)) + asid = val; + else + asid = reg / 0x1000; + + return asid; } static int qcom_iommu_ctx_probe(struct platform_device *pdev) @@ -633,7 +656,6 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) struct qcom_iommu_ctx *ctx; struct device *dev = &pdev->dev; struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev->parent); - struct resource *res; int ret, irq; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); @@ -643,19 +665,22 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) ctx->dev = dev; platform_set_drvdata(pdev, ctx); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->base = devm_ioremap_resource(dev, res); + ctx->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->base)) return PTR_ERR(ctx->base); irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENODEV; + return irq; + + if (of_device_is_compatible(dev->of_node, "qcom,msm-iommu-v2-sec")) + ctx->secured_ctx = true; /* clear IRQs before registering fault handler, just in case the * boot-loader left us a surprise: */ - iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); + if (!ctx->secured_ctx) + iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); ret = devm_request_irq(dev, irq, qcom_iommu_fault, @@ -677,7 +702,7 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) dev_dbg(dev, "found asid %u\n", ctx->asid); - qcom_iommu->ctxs[ctx->asid - 1] = ctx; + qcom_iommu->ctxs[ctx->asid] = ctx; return 0; } @@ -689,12 +714,14 @@ static void qcom_iommu_ctx_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - qcom_iommu->ctxs[ctx->asid - 1] = NULL; + qcom_iommu->ctxs[ctx->asid] = NULL; } static const struct of_device_id ctx_of_match[] = { { .compatible = "qcom,msm-iommu-v1-ns" }, { .compatible = "qcom,msm-iommu-v1-sec" }, + { .compatible = "qcom,msm-iommu-v2-ns" }, + { .compatible = "qcom,msm-iommu-v2-sec" }, { /* sentinel */ } }; @@ -712,7 +739,8 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) struct device_node *child; for_each_child_of_node(qcom_iommu->dev->of_node, child) { - if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) { + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec") || + of_device_is_compatible(child, "qcom,msm-iommu-v2-sec")) { of_node_put(child); return true; } @@ -736,11 +764,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) for_each_child_of_node(dev->of_node, child) max_asid = max(max_asid, get_asid(child)); - qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid), + qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid + 1), GFP_KERNEL); if (!qcom_iommu) return -ENOMEM; - qcom_iommu->num_ctxs = max_asid; + qcom_iommu->max_asid = max_asid; qcom_iommu->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -856,6 +884,7 @@ static const struct dev_pm_ops qcom_iommu_pm_ops = { static const struct of_device_id qcom_iommu_of_match[] = { { .compatible = "qcom,msm-iommu-v1" }, + { .compatible = "qcom,msm-iommu-v2" }, { /* sentinel */ } }; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e57724163835..4b1a88f514c9 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -660,7 +660,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova = 0; + unsigned long shift, iova_len, iova; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -675,15 +675,29 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, if (domain->geometry.force_aperture) dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); - /* Try to get PCI devices a SAC address */ - if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev)) + /* + * Try to use all the 32-bit PCI addresses first. The original SAC vs. + * DAC reasoning loses relevance with PCIe, but enough hardware and + * firmware bugs are still lurking out there that it's safest not to + * venture into the 64-bit space until necessary. + * + * If your device goes wrong after seeing the notice then likely either + * its driver is not setting DMA masks accurately, the hardware has + * some inherent bug in handling >32-bit addresses, or not all the + * expected address bits are wired up between the device and the IOMMU. + */ + if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift, false); + if (iova) + goto done; - if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, - true); + dev->iommu->pci_32bit_workaround = false; + dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); + } + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +done: return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 942790009292..c829f1f82a99 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -17,6 +17,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); extern bool iommu_dma_forcedac; +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; +} #else /* CONFIG_IOMMU_DMA */ @@ -38,5 +42,9 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 8302db7f783e..8a5c17b97310 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -51,7 +51,7 @@ static int hyperv_ir_set_affinity(struct irq_data *data, if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - send_cleanup_vector(cfg); + vector_schedule_cleanup(cfg); return 0; } @@ -257,7 +257,7 @@ static int hyperv_root_ir_set_affinity(struct irq_data *data, if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - send_cleanup_vector(cfg); + vector_schedule_cleanup(cfg); return 0; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c8c5cdc36cf..5db283c17e0d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -22,6 +22,7 @@ #include <linux/spinlock.h> #include <linux/syscore_ops.h> #include <linux/tboot.h> +#include <uapi/linux/iommufd.h> #include "iommu.h" #include "../dma-iommu.h" @@ -113,13 +114,17 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ -static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) { return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); } +static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) +{ + return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; +} static inline unsigned long page_to_dma_pfn(struct page *pg) { - return mm_to_dma_pfn(page_to_pfn(pg)); + return mm_to_dma_pfn_start(page_to_pfn(pg)); } static inline unsigned long virt_to_dma_pfn(void *p) { @@ -877,7 +882,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, } /* For request-without-pasid, get the pasid from context entry */ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) - pasid = PASID_RID2PASID; + pasid = IOMMU_NO_PASID; dir_index = pasid >> PASID_PDE_SHIFT; pde = &dir[dir_index]; @@ -1359,6 +1364,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, static void domain_update_iotlb(struct dmar_domain *domain) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; bool has_iotlb_device = false; unsigned long flags; @@ -1370,6 +1376,14 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } } + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + if (info->ats_enabled) { + has_iotlb_device = true; + break; + } + } domain->has_iotlb_device = has_iotlb_device; spin_unlock_irqrestore(&domain->lock, flags); } @@ -1449,12 +1463,13 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); + quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -1464,6 +1479,36 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + + if (!info->ats_enabled) + continue; + + qi_flush_dev_iotlb_pasid(info->iommu, + PCI_DEVID(info->bus, info->devfn), + info->pfsid, dev_pasid->pasid, + info->ats_qdep, addr, + mask); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + struct dev_pasid_info *dev_pasid; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); + + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1484,7 +1529,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1554,7 +1599,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); + domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1726,6 +1771,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); @@ -1940,7 +1986,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_pdts(pds); /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, PASID_RID2PASID); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); /* * Setup the Device-TLB enable bit and Page request @@ -2362,8 +2408,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start_pfn), - mm_to_dma_pfn(end_pfn)); + mm_to_dma_pfn_start(start_pfn), + mm_to_dma_pfn_end(end_pfn)); if (ret) return ret; } @@ -2384,8 +2430,8 @@ static int __init si_domain_init(int hw) continue; ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start >> PAGE_SHIFT), - mm_to_dma_pfn(end >> PAGE_SHIFT)); + mm_to_dma_pfn_start(start >> PAGE_SHIFT), + mm_to_dma_pfn_end(end >> PAGE_SHIFT)); if (ret) return ret; } @@ -2420,13 +2466,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); + IOMMU_NO_PASID); else ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); device_block_translation(dev); @@ -2446,30 +2492,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return 0; } -static bool device_has_rmrr(struct device *dev) -{ - struct dmar_rmrr_unit *rmrr; - struct device *tmp; - int i; - - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - /* - * Return TRUE if this RMRR contains the device that - * is passed in. - */ - for_each_active_dev_scope(rmrr->devices, - rmrr->devices_cnt, i, tmp) - if (tmp == dev || - is_downstream_to_pci_bridge(dev, tmp)) { - rcu_read_unlock(); - return true; - } - } - rcu_read_unlock(); - return false; -} - /** * device_rmrr_is_relaxable - Test whether the RMRR of this device * is relaxable (ie. is allowed to be not enforced under some conditions) @@ -2500,34 +2522,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) } /* - * There are a couple cases where we need to restrict the functionality of - * devices associated with RMRRs. The first is when evaluating a device for - * identity mapping because problems exist when devices are moved in and out - * of domains and their respective RMRR information is lost. This means that - * a device with associated RMRRs will never be in a "passthrough" domain. - * The second is use of the device through the IOMMU API. This interface - * expects to have full control of the IOVA space for the device. We cannot - * satisfy both the requirement that RMRR access is maintained and have an - * unencumbered IOVA space. We also have no ability to quiesce the device's - * use of the RMRR space or even inform the IOMMU API user of the restriction. - * We therefore prevent devices associated with an RMRR from participating in - * the IOMMU API, which eliminates them from device assignment. - * - * In both cases, devices which have relaxable RMRRs are not concerned by this - * restriction. See device_rmrr_is_relaxable comment. - */ -static bool device_is_rmrr_locked(struct device *dev) -{ - if (!device_has_rmrr(dev)) - return false; - - if (device_rmrr_is_relaxable(dev)) - return false; - - return true; -} - -/* * Return the required default domain type for a specific device. * * @dev: the device in query @@ -3560,8 +3554,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + mhp->nr_pages - 1); switch (val) { @@ -3756,7 +3750,6 @@ static int __init probe_acpi_namespace_devices(void) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { struct acpi_device_physical_node *pn; - struct iommu_group *group; struct acpi_device *adev; if (dev->bus != &acpi_bus_type) @@ -3766,12 +3759,6 @@ static int __init probe_acpi_namespace_devices(void) mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, &adev->physical_node_list, node) { - group = iommu_group_get(pn->dev); - if (group) { - iommu_group_put(group); - continue; - } - ret = iommu_probe_device(pn->dev); if (ret) break; @@ -3968,7 +3955,7 @@ static void dmar_remove_one_dev_info(struct device *dev) if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); iommu_disable_pci_caps(info); domain_context_clear(info); @@ -3997,7 +3984,7 @@ static void device_block_translation(struct device *dev) if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); else domain_context_clear(info); } @@ -4139,12 +4126,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (domain->type == IOMMU_DOMAIN_UNMANAGED && - device_is_rmrr_locked(dev)) { - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); - return -EPERM; - } - if (info->domain) device_block_translation(dev); @@ -4271,7 +4252,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, unsigned long i; nrpages = aligned_nrpages(gather->start, size); - start_pfn = mm_to_dma_pfn(iova_pfn); + start_pfn = mm_to_dma_pfn_start(iova_pfn); xa_for_each(&dmar_domain->iommu_array, i, info) iommu_flush_iotlb_psi(info->iommu, dmar_domain, @@ -4331,7 +4312,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) list_for_each_entry(info, &domain->devices, link) intel_pasid_setup_page_snoop_control(info->iommu, info->dev, - PASID_RID2PASID); + IOMMU_NO_PASID); } static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) @@ -4713,27 +4694,118 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct dev_pasid_info *curr, *dev_pasid = NULL; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; + unsigned long flags; - /* Domain type specific cleanup: */ domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (domain) { - switch (domain->type) { - case IOMMU_DOMAIN_SVA: - intel_svm_remove_dev_pasid(dev, pasid); - break; - default: - /* should never reach here */ - WARN_ON(1); + if (WARN_ON_ONCE(!domain)) + goto out_tear_down; + + /* + * The SVA implementation needs to handle its own stuffs like the mm + * notification. Before consolidating that code into iommu core, let + * the intel sva code handle it. + */ + if (domain->type == IOMMU_DOMAIN_SVA) { + intel_svm_remove_dev_pasid(dev, pasid); + goto out_tear_down; + } + + dmar_domain = to_dmar_domain(domain); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { + if (curr->dev == dev && curr->pasid == pasid) { + list_del(&curr->link_domain); + dev_pasid = curr; break; } } + WARN_ON_ONCE(!dev_pasid); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_detach_iommu(dmar_domain, iommu); + kfree(dev_pasid); +out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); + intel_drain_pasid_prq(dev, pasid); +} + +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_iommu *iommu = info->iommu; + struct dev_pasid_info *dev_pasid; + unsigned long flags; + int ret; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + if (context_copied(iommu, info->bus, info->devfn)) + return -EBUSY; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + + ret = domain_attach_iommu(dmar_domain, iommu); + if (ret) + goto out_free; + + if (domain_type_is_si(dmar_domain)) + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, + dev, pasid); + else if (dmar_domain->use_first_level) + ret = domain_setup_first_level(iommu, dmar_domain, + dev, pasid); + else + ret = intel_pasid_setup_second_level(iommu, dmar_domain, + dev, pasid); + if (ret) + goto out_detach_iommu; + + dev_pasid->dev = dev; + dev_pasid->pasid = pasid; + spin_lock_irqsave(&dmar_domain->lock, flags); + list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + + return 0; +out_detach_iommu: + domain_detach_iommu(dmar_domain, iommu); +out_free: + kfree(dev_pasid); + return ret; +} + +static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct iommu_hw_info_vtd *vtd; + + vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); + if (!vtd) + return ERR_PTR(-ENOMEM); + + vtd->cap_reg = iommu->cap; + vtd->ecap_reg = iommu->ecap; + *length = sizeof(*vtd); + *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; + return vtd; } const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, + .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, .probe_device = intel_iommu_probe_device, .probe_finalize = intel_iommu_probe_finalize, @@ -4751,6 +4823,7 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, @@ -4987,7 +5060,7 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, return; sid = PCI_DEVID(info->bus, info->devfn); - if (pasid == PASID_RID2PASID) { + if (pasid == IOMMU_NO_PASID) { qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, address, mask); } else { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1c5e1d88862b..c18fb699c87a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,6 +595,7 @@ struct dmar_domain { spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ + struct list_head dev_pasids; /* all attached pasids */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ @@ -717,6 +718,12 @@ struct device_domain_info { struct pasid_table *pasid_table; /* pasid table */ }; +struct dev_pasid_info { + struct list_head link_domain; /* link to domain siblings */ + struct device *dev; + ioasid_t pasid; +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) { @@ -844,6 +851,7 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); +void intel_drain_pasid_prq(struct device *dev, u32 pasid); struct intel_svm_dev { struct list_head list; @@ -862,6 +870,7 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} static inline struct iommu_domain *intel_svm_domain_alloc(void) { return NULL; diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 08f56326e2f8..29b9e55dcf26 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1176,7 +1176,7 @@ intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, * at the new destination. So, time to cleanup the previous * vector allocation. */ - send_cleanup_vector(cfg); + vector_schedule_cleanup(cfg); return IRQ_SET_MASK_OK_DONE; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c5d479770e12..8f92b92f3d2a 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -129,7 +129,7 @@ int intel_pasid_alloc_table(struct device *dev) info->pasid_table = pasid_table; if (!ecap_coherent(info->iommu->ecap)) - clflush_cache_range(pasid_table->table, size); + clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); return 0; } @@ -438,7 +438,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, * SVA usage, device could do DMA with multiple PASIDs. It is more * efficient to flush devTLB specific to the PASID. */ - if (pasid == PASID_RID2PASID) + if (pasid == IOMMU_NO_PASID) qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); else qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d6b7d21244b1..4e9e68c3c388 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -10,8 +10,6 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H -#define PASID_RID2PASID 0x0 -#define PASID_MIN 0x1 #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e95b339e9cdc..50a481c895b8 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -26,8 +26,6 @@ #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); -static void intel_svm_drain_prq(struct device *dev, u32 pasid); -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) static DEFINE_XARRAY_ALLOC(pasid_private_array); static int pasid_private_add(ioasid_t pasid, void *priv) @@ -219,9 +217,9 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, } /* Pages have been freed at this point */ -static void intel_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) +static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); @@ -256,11 +254,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops intel_mmuops = { .release = intel_mm_release, - .invalidate_range = intel_invalidate_range, + .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, }; -static DEFINE_MUTEX(pasid_mutex); - static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm **rsvm, struct intel_svm_dev **rsdev) @@ -268,10 +264,6 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm_dev *sdev = NULL; struct intel_svm *svm; - /* The caller should hold the pasid_mutex lock */ - if (WARN_ON(!mutex_is_locked(&pasid_mutex))) - return -EINVAL; - if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) return -EINVAL; @@ -371,37 +363,23 @@ free_svm: return ret; } -/* Caller must hold pasid_mutex */ -static int intel_svm_unbind_mm(struct device *dev, u32 pasid) +void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) { struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; struct mm_struct *mm; - int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) - goto out; + return; - ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); - if (ret) - goto out; + if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) + return; mm = svm->mm; if (sdev) { list_del_rcu(&sdev->list); - /* - * Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. - */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -418,8 +396,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } -out: - return ret; } /* Page request queue descriptor */ @@ -460,7 +436,7 @@ static bool is_canonical_address(u64 addr) } /** - * intel_svm_drain_prq - Drain page requests and responses for a pasid + * intel_drain_pasid_prq - Drain page requests and responses for a pasid * @dev: target device * @pasid: pasid for draining * @@ -474,7 +450,7 @@ static bool is_canonical_address(u64 addr) * described in VT-d spec CH7.10 to drain all page requests and page * responses pending in the hardware. */ -static void intel_svm_drain_prq(struct device *dev, u32 pasid) +void intel_drain_pasid_prq(struct device *dev, u32 pasid) { struct device_domain_info *info; struct dmar_domain *domain; @@ -520,19 +496,7 @@ prq_retry: goto prq_retry; } - /* - * A work in IO page fault workqueue may try to lock pasid_mutex now. - * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for - * all works in the workqueue to finish may cause deadlock. - * - * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). - * Unlock it to allow the works to be handled while waiting for - * them to finish. - */ - lockdep_assert_held(&pasid_mutex); - mutex_unlock(&pasid_mutex); iopf_queue_flush_dev(dev); - mutex_lock(&pasid_mutex); /* * Perform steps described in VT-d spec CH7.10 to drain page @@ -827,26 +791,14 @@ out: return ret; } -void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) -{ - mutex_lock(&pasid_mutex); - intel_svm_unbind_mm(dev, pasid); - mutex_unlock(&pasid_mutex); -} - static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; - int ret; - mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(iommu, dev, mm); - mutex_unlock(&pasid_mutex); - - return ret; + return intel_svm_bind_mm(iommu, dev, mm); } static void intel_svm_domain_free(struct iommu_domain *domain) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h new file mode 100644 index 000000000000..2024a2313348 --- /dev/null +++ b/drivers/iommu/iommu-priv.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __LINUX_IOMMU_PRIV_H +#define __LINUX_IOMMU_PRIV_H + +#include <linux/iommu.h> + +static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) +{ + /* + * Assume that valid ops must be installed if iommu_probe_device() + * has succeeded. The device ops are essentially for internal use + * within the IOMMU subsystem itself, so we should be able to trust + * ourselves not to misuse the helper. + */ + return dev->iommu->iommu_dev->ops; +} + +int iommu_group_replace_domain(struct iommu_group *group, + struct iommu_domain *new_domain); + +int iommu_device_register_bus(struct iommu_device *iommu, + const struct iommu_ops *ops, struct bus_type *bus, + struct notifier_block *nb); +void iommu_device_unregister_bus(struct iommu_device *iommu, + struct bus_type *bus, + struct notifier_block *nb); + +#endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 05c0fb2acbc4..b78671a8a914 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,34 +10,30 @@ #include "iommu-sva.h" static DEFINE_MUTEX(iommu_sva_lock); -static DEFINE_IDA(iommu_global_pasid_ida); /* Allocate a PASID for the mm within range (inclusive) */ -static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev) { + ioasid_t pasid; int ret = 0; - if (min == IOMMU_PASID_INVALID || - max == IOMMU_PASID_INVALID || - min == 0 || max < min) - return -EINVAL; - if (!arch_pgtable_dma_compat(mm)) return -EBUSY; mutex_lock(&iommu_sva_lock); /* Is a PASID already associated with this mm? */ if (mm_valid_pasid(mm)) { - if (mm->pasid < min || mm->pasid > max) + if (mm->pasid >= dev->iommu->max_pasids) ret = -EOVERFLOW; goto out; } - ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL); - if (ret < 0) + pasid = iommu_alloc_global_pasid(dev); + if (pasid == IOMMU_PASID_INVALID) { + ret = -ENOSPC; goto out; - - mm->pasid = ret; + } + mm->pasid = pasid; ret = 0; out: mutex_unlock(&iommu_sva_lock); @@ -64,15 +60,10 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm { struct iommu_domain *domain; struct iommu_sva *handle; - ioasid_t max_pasids; int ret; - max_pasids = dev->iommu->max_pasids; - if (!max_pasids) - return ERR_PTR(-EOPNOTSUPP); - /* Allocate mm->pasid if necessary. */ - ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + ret = iommu_sva_alloc_pasid(mm, dev); if (ret) return ERR_PTR(ret); @@ -217,5 +208,5 @@ void mm_pasid_drop(struct mm_struct *mm) if (likely(!mm_valid_pasid(mm))) return; - ida_free(&iommu_global_pasid_ida, mm->pasid); + iommu_free_global_pasid(mm->pasid); } diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index 99869217fbec..cbe378c34ba3 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -107,9 +107,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) { int ret; - if (!iommu || IS_ERR(iommu)) - return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) @@ -122,14 +119,9 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) return ret; } -EXPORT_SYMBOL_GPL(iommu_device_link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link) { - if (!iommu || IS_ERR(iommu)) - return; - sysfs_remove_link(&link->kobj, "iommu"); sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } -EXPORT_SYMBOL_GPL(iommu_device_unlink); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index caaf563d38ae..3bfc56df4f78 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -34,11 +34,14 @@ #include <linux/msi.h> #include "dma-iommu.h" +#include "iommu-priv.h" #include "iommu-sva.h" +#include "iommu-priv.h" static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static DEFINE_IDA(iommu_global_pasid_ida); static unsigned int iommu_def_domain_type __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); @@ -127,9 +130,12 @@ static int iommu_setup_default_domain(struct iommu_group *group, int target_type); static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev); -static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev); +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -287,6 +293,48 @@ void iommu_device_unregister(struct iommu_device *iommu) } EXPORT_SYMBOL_GPL(iommu_device_unregister); +#if IS_ENABLED(CONFIG_IOMMUFD_TEST) +void iommu_device_unregister_bus(struct iommu_device *iommu, + struct bus_type *bus, + struct notifier_block *nb) +{ + bus_unregister_notifier(bus, nb); + iommu_device_unregister(iommu); +} +EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); + +/* + * Register an iommu driver against a single bus. This is only used by iommufd + * selftest to create a mock iommu driver. The caller must provide + * some memory to hold a notifier_block. + */ +int iommu_device_register_bus(struct iommu_device *iommu, + const struct iommu_ops *ops, struct bus_type *bus, + struct notifier_block *nb) +{ + int err; + + iommu->ops = ops; + nb->notifier_call = iommu_bus_notifier; + err = bus_register_notifier(bus, nb); + if (err) + return err; + + spin_lock(&iommu_device_lock); + list_add_tail(&iommu->list, &iommu_device_list); + spin_unlock(&iommu_device_lock); + + bus->iommu_ops = ops; + err = bus_iommu_probe(bus); + if (err) { + iommu_device_unregister_bus(iommu, bus, nb); + return err; + } + return 0; +} +EXPORT_SYMBOL_GPL(iommu_device_register_bus); +#endif + static struct dev_iommu *dev_iommu_get(struct device *dev) { struct dev_iommu *param = dev->iommu; @@ -333,28 +381,18 @@ static u32 dev_iommu_get_max_pasids(struct device *dev) return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); } -static int __iommu_probe_device(struct device *dev, struct list_head *group_list) +/* + * Init the dev->iommu and dev->iommu_group in the struct device and get the + * driver probed + */ +static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) { - const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; - static DEFINE_MUTEX(iommu_probe_device_lock); int ret; - if (!ops) - return -ENODEV; - /* - * Serialise to avoid races between IOMMU drivers registering in - * parallel and/or the "replay" calls from ACPI/OF code via client - * driver probe. Once the latter have been cleaned up we should - * probably be able to use device_lock() here to minimise the scope, - * but for now enforcing a simple global ordering is fine. - */ - mutex_lock(&iommu_probe_device_lock); - if (!dev_iommu_get(dev)) { - ret = -ENOMEM; - goto err_unlock; - } + if (!dev_iommu_get(dev)) + return -ENOMEM; if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -364,124 +402,184 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list iommu_dev = ops->probe_device(dev); if (IS_ERR(iommu_dev)) { ret = PTR_ERR(iommu_dev); - goto out_module_put; + goto err_module_put; } - dev->iommu->iommu_dev = iommu_dev; - dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); - if (ops->is_attach_deferred) - dev->iommu->attach_deferred = ops->is_attach_deferred(dev); + ret = iommu_device_link(iommu_dev, dev); + if (ret) + goto err_release; - group = iommu_group_get_for_dev(dev); + group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + group = ERR_PTR(-EINVAL); if (IS_ERR(group)) { ret = PTR_ERR(group); - goto out_release; + goto err_unlink; } + dev->iommu_group = group; - mutex_lock(&group->mutex); - if (group_list && !group->default_domain && list_empty(&group->entry)) - list_add_tail(&group->entry, group_list); - mutex_unlock(&group->mutex); - iommu_group_put(group); - - mutex_unlock(&iommu_probe_device_lock); - iommu_device_link(iommu_dev, dev); - + dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); + if (ops->is_attach_deferred) + dev->iommu->attach_deferred = ops->is_attach_deferred(dev); return 0; -out_release: +err_unlink: + iommu_device_unlink(iommu_dev, dev); +err_release: if (ops->release_device) ops->release_device(dev); - -out_module_put: +err_module_put: module_put(ops->owner); - err_free: dev_iommu_free(dev); + return ret; +} -err_unlock: - mutex_unlock(&iommu_probe_device_lock); +static void iommu_deinit_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + const struct iommu_ops *ops = dev_iommu_ops(dev); - return ret; + lockdep_assert_held(&group->mutex); + + iommu_device_unlink(dev->iommu->iommu_dev, dev); + + /* + * release_device() must stop using any attached domain on the device. + * If there are still other devices in the group they are not effected + * by this callback. + * + * The IOMMU driver must set the device to either an identity or + * blocking translation and stop using any domain pointer, as it is + * going to be freed. + */ + if (ops->release_device) + ops->release_device(dev); + + /* + * If this is the last driver to use the group then we must free the + * domains before we do the module_put(). + */ + if (list_empty(&group->devices)) { + if (group->default_domain) { + iommu_domain_free(group->default_domain); + group->default_domain = NULL; + } + if (group->blocking_domain) { + iommu_domain_free(group->blocking_domain); + group->blocking_domain = NULL; + } + group->domain = NULL; + } + + /* Caller must put iommu_group */ + dev->iommu_group = NULL; + module_put(ops->owner); + dev_iommu_free(dev); } -int iommu_probe_device(struct device *dev) +static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops; + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); + struct group_device *gdev; int ret; - ret = __iommu_probe_device(dev, NULL); - if (ret) - goto err_out; + if (!ops) + return -ENODEV; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); - group = iommu_group_get(dev); - if (!group) { - ret = -ENODEV; - goto err_release; + /* Device is probed already if in a group */ + if (dev->iommu_group) { + ret = 0; + goto out_unlock; } + ret = iommu_init_device(dev, ops); + if (ret) + goto out_unlock; + + group = dev->iommu_group; + gdev = iommu_group_alloc_device(group, dev); mutex_lock(&group->mutex); + if (IS_ERR(gdev)) { + ret = PTR_ERR(gdev); + goto err_put_group; + } + /* + * The gdev must be in the list before calling + * iommu_setup_default_domain() + */ + list_add_tail(&gdev->list, &group->devices); + WARN_ON(group->default_domain && !group->domain); if (group->default_domain) iommu_create_device_direct_mappings(group->default_domain, dev); - if (group->domain) { ret = __iommu_device_set_domain(group, dev, group->domain, 0); if (ret) - goto err_unlock; - } else if (!group->default_domain) { + goto err_remove_gdev; + } else if (!group->default_domain && !group_list) { ret = iommu_setup_default_domain(group, 0); if (ret) - goto err_unlock; + goto err_remove_gdev; + } else if (!group->default_domain) { + /* + * With a group_list argument we defer the default_domain setup + * to the caller by providing a de-duplicated list of groups + * that need further setup. + */ + if (list_empty(&group->entry)) + list_add_tail(&group->entry, group_list); } - mutex_unlock(&group->mutex); - iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); - ops = dev_iommu_ops(dev); - if (ops->probe_finalize) - ops->probe_finalize(dev); + if (dev_is_pci(dev)) + iommu_dma_set_pci_32bit_workaround(dev); return 0; -err_unlock: +err_remove_gdev: + list_del(&gdev->list); + __iommu_group_free_device(group, gdev); +err_put_group: + iommu_deinit_device(dev); mutex_unlock(&group->mutex); iommu_group_put(group); -err_release: - iommu_release_device(dev); +out_unlock: + mutex_unlock(&iommu_probe_device_lock); -err_out: return ret; - } -/* - * Remove a device from a group's device list and return the group device - * if successful. - */ -static struct group_device * -__iommu_group_remove_device(struct iommu_group *group, struct device *dev) +int iommu_probe_device(struct device *dev) { - struct group_device *device; + const struct iommu_ops *ops; + int ret; - lockdep_assert_held(&group->mutex); - for_each_group_device(group, device) { - if (device->dev == dev) { - list_del(&device->list); - return device; - } - } + ret = __iommu_probe_device(dev, NULL); + if (ret) + return ret; - return NULL; + ops = dev_iommu_ops(dev); + if (ops->probe_finalize) + ops->probe_finalize(dev); + + return 0; } -/* - * Release a device from its group and decrements the iommu group reference - * count. - */ -static void __iommu_group_release_device(struct iommu_group *group, - struct group_device *grp_dev) +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev) { struct device *dev = grp_dev->dev; @@ -490,54 +588,57 @@ static void __iommu_group_release_device(struct iommu_group *group, trace_remove_device_from_group(group->id, dev); + /* + * If the group has become empty then ownership must have been + * released, and the current domain must be set back to NULL or + * the default domain. + */ + if (list_empty(&group->devices)) + WARN_ON(group->owner_cnt || + group->domain != group->default_domain); + kfree(grp_dev->name); kfree(grp_dev); - dev->iommu_group = NULL; - kobject_put(group->devices_kobj); } -static void iommu_release_device(struct device *dev) +/* Remove the iommu_group from the struct device. */ +static void __iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; struct group_device *device; - const struct iommu_ops *ops; - - if (!dev->iommu || !group) - return; - - iommu_device_unlink(dev->iommu->iommu_dev, dev); mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); + for_each_group_device(group, device) { + if (device->dev != dev) + continue; - /* - * If the group has become empty then ownership must have been released, - * and the current domain must be set back to NULL or the default - * domain. - */ - if (list_empty(&group->devices)) - WARN_ON(group->owner_cnt || - group->domain != group->default_domain); + list_del(&device->list); + __iommu_group_free_device(group, device); + if (dev->iommu && dev->iommu->iommu_dev) + iommu_deinit_device(dev); + else + dev->iommu_group = NULL; + break; + } + mutex_unlock(&group->mutex); /* - * release_device() must stop using any attached domain on the device. - * If there are still other devices in the group they are not effected - * by this callback. - * - * The IOMMU driver must set the device to either an identity or - * blocking translation and stop using any domain pointer, as it is - * going to be freed. + * Pairs with the get in iommu_init_device() or + * iommu_group_add_device() */ - ops = dev_iommu_ops(dev); - if (ops->release_device) - ops->release_device(dev); - mutex_unlock(&group->mutex); + iommu_group_put(group); +} - if (device) - __iommu_group_release_device(group, device); +static void iommu_release_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; - module_put(ops->owner); - dev_iommu_free(dev); + if (group) + __iommu_group_remove_device(dev); + + /* Free any fwspec if no iommu_driver was ever attached */ + if (dev->iommu) + dev_iommu_free(dev); } static int __init iommu_set_def_domain_type(char *str) @@ -798,10 +899,9 @@ static void iommu_group_release(struct kobject *kobj) ida_free(&iommu_group_ida, group->id); - if (group->default_domain) - iommu_domain_free(group->default_domain); - if (group->blocking_domain) - iommu_domain_free(group->blocking_domain); + /* Domains are free'd by iommu_deinit_device() */ + WARN_ON(group->default_domain); + WARN_ON(group->blocking_domain); kfree(group->name); kfree(group); @@ -959,14 +1059,12 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, unsigned long pg_size; int ret = 0; - if (!iommu_is_dma_domain(domain)) - return 0; - - BUG_ON(!domain->pgsize_bitmap); - - pg_size = 1UL << __ffs(domain->pgsize_bitmap); + pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; INIT_LIST_HEAD(&mappings); + if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) + return -EINVAL; + iommu_get_resv_regions(dev, &mappings); /* We need to consider overlapping regions for different devices */ @@ -974,13 +1072,17 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, dma_addr_t start, end, addr; size_t map_size = 0; - start = ALIGN(entry->start, pg_size); - end = ALIGN(entry->start + entry->length, pg_size); + if (entry->type == IOMMU_RESV_DIRECT) + dev->iommu->require_direct = 1; - if (entry->type != IOMMU_RESV_DIRECT && - entry->type != IOMMU_RESV_DIRECT_RELAXABLE) + if ((entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || + !iommu_is_dma_domain(domain)) continue; + start = ALIGN(entry->start, pg_size); + end = ALIGN(entry->start + entry->length, pg_size); + for (addr = start; addr <= end; addr += pg_size) { phys_addr_t phys_addr; @@ -1014,22 +1116,16 @@ out: return ret; } -/** - * iommu_group_add_device - add a device to an iommu group - * @group: the group into which to add the device (reference should be held) - * @dev: the device - * - * This function is called by an iommu driver to add a device into a - * group. Adding a device increments the group reference count. - */ -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +/* This is undone by __iommu_group_free_device() */ +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev) { int ret, i = 0; struct group_device *device; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) - return -ENOMEM; + return ERR_PTR(-ENOMEM); device->dev = dev; @@ -1060,18 +1156,11 @@ rename: goto err_free_name; } - kobject_get(group->devices_kobj); - - dev->iommu_group = group; - - mutex_lock(&group->mutex); - list_add_tail(&device->list, &group->devices); - mutex_unlock(&group->mutex); trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); - return 0; + return device; err_free_name: kfree(device->name); @@ -1080,7 +1169,32 @@ err_remove_link: err_free_device: kfree(device); dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); - return ret; + return ERR_PTR(ret); +} + +/** + * iommu_group_add_device - add a device to an iommu group + * @group: the group into which to add the device (reference should be held) + * @dev: the device + * + * This function is called by an iommu driver to add a device into a + * group. Adding a device increments the group reference count. + */ +int iommu_group_add_device(struct iommu_group *group, struct device *dev) +{ + struct group_device *gdev; + + gdev = iommu_group_alloc_device(group, dev); + if (IS_ERR(gdev)) + return PTR_ERR(gdev); + + iommu_group_ref_get(group); + dev->iommu_group = group; + + mutex_lock(&group->mutex); + list_add_tail(&gdev->list, &group->devices); + mutex_unlock(&group->mutex); + return 0; } EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -1094,19 +1208,13 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device); void iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - struct group_device *device; if (!group) return; dev_info(dev, "Removing from iommu group %d\n", group->id); - mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); - mutex_unlock(&group->mutex); - - if (device) - __iommu_group_release_device(group, device); + __iommu_group_remove_device(dev); } EXPORT_SYMBOL_GPL(iommu_group_remove_device); @@ -1664,45 +1772,6 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) return dom; } -/** - * iommu_group_get_for_dev - Find or create the IOMMU group for a device - * @dev: target device - * - * This function is intended to be called by IOMMU drivers and extended to - * support common, bus-defined algorithms when determining or creating the - * IOMMU group for a device. On success, the caller will hold a reference - * to the returned IOMMU group, which will already include the provided - * device. The reference should be released with iommu_group_put(). - */ -static struct iommu_group *iommu_group_get_for_dev(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - struct iommu_group *group; - int ret; - - group = iommu_group_get(dev); - if (group) - return group; - - group = ops->device_group(dev); - if (WARN_ON_ONCE(group == NULL)) - return ERR_PTR(-EINVAL); - - if (IS_ERR(group)) - return group; - - ret = iommu_group_add_device(group, dev); - if (ret) - goto out_put_group; - - return group; - -out_put_group: - iommu_group_put(group); - - return ERR_PTR(ret); -} - struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { return group->default_domain; @@ -1711,16 +1780,8 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) static int probe_iommu_group(struct device *dev, void *data) { struct list_head *group_list = data; - struct iommu_group *group; int ret; - /* Device is probed already if in a group */ - group = iommu_group_get(dev); - if (group) { - iommu_group_put(group); - return 0; - } - ret = __iommu_probe_device(dev, group_list); if (ret == -ENODEV) ret = 0; @@ -1796,11 +1857,6 @@ int bus_iommu_probe(const struct bus_type *bus) LIST_HEAD(group_list); int ret; - /* - * This code-path does not allocate the default domain when - * creating the iommu group, so do it after the groups are - * created. - */ ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); if (ret) return ret; @@ -1813,6 +1869,11 @@ int bus_iommu_probe(const struct bus_type *bus) /* Remove item from the list */ list_del_init(&group->entry); + /* + * We go to the trouble of deferred default domain creation so + * that the cross-group default domain type and the setup of the + * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. + */ ret = iommu_setup_default_domain(group, 0); if (ret) { mutex_unlock(&group->mutex); @@ -2114,6 +2175,32 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); +/** + * iommu_group_replace_domain - replace the domain that a group is attached to + * @new_domain: new IOMMU domain to replace with + * @group: IOMMU group that will be attached to the new domain + * + * This API allows the group to switch domains without being forced to go to + * the blocking domain in-between. + * + * If the currently attached domain is a core domain (e.g. a default_domain), + * it will act just like the iommu_attach_group(). + */ +int iommu_group_replace_domain(struct iommu_group *group, + struct iommu_domain *new_domain) +{ + int ret; + + if (!new_domain) + return -EINVAL; + + mutex_lock(&group->mutex); + ret = __iommu_group_set_domain(group, new_domain); + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, IOMMUFD_INTERNAL); + static int __iommu_device_set_domain(struct iommu_group *group, struct device *dev, struct iommu_domain *new_domain, @@ -2121,6 +2208,21 @@ static int __iommu_device_set_domain(struct iommu_group *group, { int ret; + /* + * If the device requires IOMMU_RESV_DIRECT then we cannot allow + * the blocking domain to be attached as it does not contain the + * required 1:1 mapping. This test effectively excludes the device + * being used with iommu_group_claim_dma_owner() which will block + * vfio and iommufd as well. + */ + if (dev->iommu->require_direct && + (new_domain->type == IOMMU_DOMAIN_BLOCKED || + new_domain == group->blocking_domain)) { + dev_warn(dev, + "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); + return -EINVAL; + } + if (dev->iommu->attach_deferred) { if (new_domain == group->default_domain) return 0; @@ -2642,6 +2744,14 @@ int iommu_set_pgtable_quirks(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); +/** + * iommu_get_resv_regions - get reserved regions + * @dev: device for which to get reserved regions + * @list: reserved region list for device + * + * This returns a list of reserved IOVA regions specific to this device. + * A domain user should not map IOVA in these ranges. + */ void iommu_get_resv_regions(struct device *dev, struct list_head *list) { const struct iommu_ops *ops = dev_iommu_ops(dev); @@ -2649,9 +2759,10 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list) if (ops->get_resv_regions) ops->get_resv_regions(dev, list); } +EXPORT_SYMBOL_GPL(iommu_get_resv_regions); /** - * iommu_put_resv_regions - release resered regions + * iommu_put_resv_regions - release reserved regions * @dev: device for which to free reserved regions * @list: reserved region list for device * @@ -3203,7 +3314,7 @@ static void __iommu_release_dma_ownership(struct iommu_group *group) /** * iommu_group_release_dma_owner() - Release DMA ownership of a group - * @dev: The device + * @group: The group * * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). */ @@ -3217,7 +3328,7 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); /** * iommu_device_release_dma_owner() - Release DMA ownership of a device - * @group: The device. + * @dev: The device. * * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). */ @@ -3400,3 +3511,30 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, return domain; } + +ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + int ret; + + /* max_pasids == 0 means that the device does not support PASID */ + if (!dev->iommu->max_pasids) + return IOMMU_PASID_INVALID; + + /* + * max_pasids is set up by vendor driver based on number of PASID bits + * supported but the IDA allocation is inclusive. + */ + ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, + dev->iommu->max_pasids - 1, GFP_KERNEL); + return ret < 0 ? IOMMU_PASID_INVALID : ret; +} +EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); + +void iommu_free_global_pasid(ioasid_t pasid) +{ + if (WARN_ON(pasid == IOMMU_PASID_INVALID)) + return; + + ida_free(&iommu_global_pasid_ida, pasid); +} +EXPORT_SYMBOL_GPL(iommu_free_global_pasid); diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index ada693ea51a7..99d4b075df49 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -14,8 +14,8 @@ config IOMMUFD if IOMMUFD config IOMMUFD_VFIO_CONTAINER bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" - depends on VFIO && !VFIO_CONTAINER - default VFIO && !VFIO_CONTAINER + depends on VFIO_GROUP && !VFIO_CONTAINER + default VFIO_GROUP && !VFIO_CONTAINER help IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on IOMMUFD providing compatibility emulation to give the same ioctls. diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ed2937a4e196..ce78c3671539 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -4,6 +4,8 @@ #include <linux/iommufd.h> #include <linux/slab.h> #include <linux/iommu.h> +#include <uapi/linux/iommufd.h> +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" @@ -15,13 +17,127 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +static void iommufd_group_release(struct kref *kref) +{ + struct iommufd_group *igroup = + container_of(kref, struct iommufd_group, ref); + + WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + + xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, + NULL, GFP_KERNEL); + iommu_group_put(igroup->group); + mutex_destroy(&igroup->lock); + kfree(igroup); +} + +static void iommufd_put_group(struct iommufd_group *group) +{ + kref_put(&group->ref, iommufd_group_release); +} + +static bool iommufd_group_try_get(struct iommufd_group *igroup, + struct iommu_group *group) +{ + if (!igroup) + return false; + /* + * group ID's cannot be re-used until the group is put back which does + * not happen if we could get an igroup pointer under the xa_lock. + */ + if (WARN_ON(igroup->group != group)) + return false; + return kref_get_unless_zero(&igroup->ref); +} + +/* + * iommufd needs to store some more data for each iommu_group, we keep a + * parallel xarray indexed by iommu_group id to hold this instead of putting it + * in the core structure. To keep things simple the iommufd_group memory is + * unique within the iommufd_ctx. This makes it easy to check there are no + * memory leaks. + */ +static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, + struct device *dev) +{ + struct iommufd_group *new_igroup; + struct iommufd_group *cur_igroup; + struct iommufd_group *igroup; + struct iommu_group *group; + unsigned int id; + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + id = iommu_group_id(group); + + xa_lock(&ictx->groups); + igroup = xa_load(&ictx->groups, id); + if (iommufd_group_try_get(igroup, group)) { + xa_unlock(&ictx->groups); + iommu_group_put(group); + return igroup; + } + xa_unlock(&ictx->groups); + + new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); + if (!new_igroup) { + iommu_group_put(group); + return ERR_PTR(-ENOMEM); + } + + kref_init(&new_igroup->ref); + mutex_init(&new_igroup->lock); + INIT_LIST_HEAD(&new_igroup->device_list); + new_igroup->sw_msi_start = PHYS_ADDR_MAX; + /* group reference moves into new_igroup */ + new_igroup->group = group; + + /* + * The ictx is not additionally refcounted here becase all objects using + * an igroup must put it before their destroy completes. + */ + new_igroup->ictx = ictx; + + /* + * We dropped the lock so igroup is invalid. NULL is a safe and likely + * value to assume for the xa_cmpxchg algorithm. + */ + cur_igroup = NULL; + xa_lock(&ictx->groups); + while (true) { + igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, + GFP_KERNEL); + if (xa_is_err(igroup)) { + xa_unlock(&ictx->groups); + iommufd_put_group(new_igroup); + return ERR_PTR(xa_err(igroup)); + } + + /* new_group was successfully installed */ + if (cur_igroup == igroup) { + xa_unlock(&ictx->groups); + return new_igroup; + } + + /* Check again if the current group is any good */ + if (iommufd_group_try_get(igroup, group)) { + xa_unlock(&ictx->groups); + iommufd_put_group(new_igroup); + return igroup; + } + cur_igroup = igroup; + } +} + void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); iommu_device_release_dma_owner(idev->dev); - iommu_group_put(idev->group); + iommufd_put_group(idev->igroup); if (!iommufd_selftest_is_mock_dev(idev->dev)) iommufd_ctx_put(idev->ictx); } @@ -46,7 +162,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id) { struct iommufd_device *idev; - struct iommu_group *group; + struct iommufd_group *igroup; int rc; /* @@ -56,9 +172,29 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) return ERR_PTR(-EINVAL); - group = iommu_group_get(dev); - if (!group) - return ERR_PTR(-ENODEV); + igroup = iommufd_get_group(ictx, dev); + if (IS_ERR(igroup)) + return ERR_CAST(igroup); + + /* + * For historical compat with VFIO the insecure interrupt path is + * allowed if the module parameter is set. Secure/Isolated means that a + * MemWr operation from the device (eg a simple DMA) cannot trigger an + * interrupt outside this iommufd context. + */ + if (!iommufd_selftest_is_mock_dev(dev) && + !iommu_group_has_isolated_msi(igroup->group)) { + if (!allow_unsafe_interrupts) { + rc = -EPERM; + goto out_group_put; + } + + dev_warn( + dev, + "MSI interrupts are not secure, they cannot be isolated by the platform. " + "Check that platform features like interrupt remapping are enabled. " + "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + } rc = iommu_device_claim_dma_owner(dev, ictx); if (rc) @@ -77,8 +213,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); /* The calling driver is a user until iommufd_device_unbind() */ refcount_inc(&idev->obj.users); - /* group refcount moves into iommufd_device */ - idev->group = group; + /* igroup refcount moves into iommufd_device */ + idev->igroup = igroup; /* * If the caller fails after this success it must call @@ -93,12 +229,43 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, out_release_owner: iommu_device_release_dma_owner(dev); out_group_put: - iommu_group_put(group); + iommufd_put_group(igroup); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); /** + * iommufd_ctx_has_group - True if any device within the group is bound + * to the ictx + * @ictx: iommufd file descriptor + * @group: Pointer to a physical iommu_group struct + * + * True if any device within the group has been bound to this ictx, ex. via + * iommufd_device_bind(), therefore implying ictx ownership of the group. + */ +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) +{ + struct iommufd_object *obj; + unsigned long index; + + if (!ictx || !group) + return false; + + xa_lock(&ictx->objects); + xa_for_each(&ictx->objects, index, obj) { + if (obj->type == IOMMUFD_OBJ_DEVICE && + container_of(obj, struct iommufd_device, obj) + ->igroup->group == group) { + xa_unlock(&ictx->objects); + return true; + } + } + xa_unlock(&ictx->objects); + return false; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); + +/** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() * @@ -113,10 +280,22 @@ void iommufd_device_unbind(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); -static int iommufd_device_setup_msi(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - phys_addr_t sw_msi_start) +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) +{ + return idev->ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); + +u32 iommufd_device_to_id(struct iommufd_device *idev) { + return idev->obj.id; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); + +static int iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hw_pagetable *hwpt) +{ + phys_addr_t sw_msi_start = igroup->sw_msi_start; int rc; /* @@ -143,128 +322,192 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, */ hwpt->msi_cookie = true; } - - /* - * For historical compat with VFIO the insecure interrupt path is - * allowed if the module parameter is set. Insecure means that a MemWr - * operation from the device (eg a simple DMA) cannot trigger an - * interrupt outside this iommufd context. - */ - if (!iommufd_selftest_is_mock_dev(idev->dev) && - !iommu_group_has_isolated_msi(idev->group)) { - if (!allow_unsafe_interrupts) - return -EPERM; - - dev_warn( - idev->dev, - "MSI interrupts are not secure, they cannot be isolated by the platform. " - "Check that platform features like interrupt remapping are enabled. " - "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); - } return 0; } -static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, - struct iommu_group *group) -{ - struct iommufd_device *cur_dev; - - lockdep_assert_held(&hwpt->devices_lock); - - list_for_each_entry(cur_dev, &hwpt->devices, devices_item) - if (cur_dev->group == group) - return true; - return false; -} - int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { - phys_addr_t sw_msi_start = PHYS_ADDR_MAX; int rc; - lockdep_assert_held(&hwpt->devices_lock); - - if (WARN_ON(idev->hwpt)) - return -EINVAL; + mutex_lock(&idev->igroup->lock); - /* - * Try to upgrade the domain we have, it is an iommu driver bug to - * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail - * enforce_cache_coherency when there are no devices attached to the - * domain. - */ - if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { - if (hwpt->domain->ops->enforce_cache_coherency) - hwpt->enforce_cache_coherency = - hwpt->domain->ops->enforce_cache_coherency( - hwpt->domain); - if (!hwpt->enforce_cache_coherency) { - WARN_ON(list_empty(&hwpt->devices)); - return -EINVAL; - } + if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { + rc = -EINVAL; + goto err_unlock; } - rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, - idev->group, &sw_msi_start); - if (rc) - return rc; + /* Try to upgrade the domain we have */ + if (idev->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (rc) + goto err_unlock; + } - rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); + rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev, + &idev->igroup->sw_msi_start); if (rc) - goto err_unresv; + goto err_unlock; /* - * FIXME: Hack around missing a device-centric iommu api, only attach to - * the group once for the first device that is in the group. + * Only attach to the group once for the first device that is in the + * group. All the other devices will follow this attachment. The user + * should attach every device individually to the hwpt as the per-device + * reserved regions are only updated during individual device + * attachment. */ - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { - rc = iommu_attach_group(hwpt->domain, idev->group); + if (list_empty(&idev->igroup->device_list)) { + rc = iommufd_group_setup_msi(idev->igroup, hwpt); if (rc) goto err_unresv; + + rc = iommu_attach_group(hwpt->domain, idev->igroup->group); + if (rc) + goto err_unresv; + idev->igroup->hwpt = hwpt; } + refcount_inc(&hwpt->obj.users); + list_add_tail(&idev->group_item, &idev->igroup->device_list); + mutex_unlock(&idev->igroup->lock); return 0; err_unresv: iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); +err_unlock: + mutex_unlock(&idev->igroup->lock); return rc; } -void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_detach(struct iommufd_device *idev) { - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) - iommu_detach_group(hwpt->domain, idev->group); + struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + + mutex_lock(&idev->igroup->lock); + list_del(&idev->group_item); + if (list_empty(&idev->igroup->device_list)) { + iommu_detach_group(hwpt->domain, idev->igroup->group); + idev->igroup->hwpt = NULL; + } iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + mutex_unlock(&idev->igroup->lock); + + /* Caller must destroy hwpt */ + return hwpt; } -static int iommufd_device_do_attach(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt) +static struct iommufd_hw_pagetable * +iommufd_device_do_attach(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) { int rc; - mutex_lock(&hwpt->devices_lock); rc = iommufd_hw_pagetable_attach(hwpt, idev); if (rc) - goto out_unlock; + return ERR_PTR(rc); + return NULL; +} - idev->hwpt = hwpt; - refcount_inc(&hwpt->obj.users); - list_add(&idev->devices_item, &hwpt->devices); -out_unlock: - mutex_unlock(&hwpt->devices_lock); - return rc; +static struct iommufd_hw_pagetable * +iommufd_device_do_replace(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) +{ + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + unsigned int num_devices = 0; + struct iommufd_device *cur; + int rc; + + mutex_lock(&idev->igroup->lock); + + if (igroup->hwpt == NULL) { + rc = -EINVAL; + goto err_unlock; + } + + if (hwpt == igroup->hwpt) { + mutex_unlock(&idev->igroup->lock); + return NULL; + } + + /* Try to upgrade the domain we have */ + list_for_each_entry(cur, &igroup->device_list, group_item) { + num_devices++; + if (cur->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (rc) + goto err_unlock; + } + } + + old_hwpt = igroup->hwpt; + if (hwpt->ioas != old_hwpt->ioas) { + list_for_each_entry(cur, &igroup->device_list, group_item) { + rc = iopt_table_enforce_dev_resv_regions( + &hwpt->ioas->iopt, cur->dev, NULL); + if (rc) + goto err_unresv; + } + } + + rc = iommufd_group_setup_msi(idev->igroup, hwpt); + if (rc) + goto err_unresv; + + rc = iommu_group_replace_domain(igroup->group, hwpt->domain); + if (rc) + goto err_unresv; + + if (hwpt->ioas != old_hwpt->ioas) { + list_for_each_entry(cur, &igroup->device_list, group_item) + iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, + cur->dev); + } + + igroup->hwpt = hwpt; + + /* + * Move the refcounts held by the device_list to the new hwpt. Retain a + * refcount for this thread as the caller will free it. + */ + refcount_add(num_devices, &hwpt->obj.users); + if (num_devices > 1) + WARN_ON(refcount_sub_and_test(num_devices - 1, + &old_hwpt->obj.users)); + mutex_unlock(&idev->igroup->lock); + + /* Caller must destroy old_hwpt */ + return old_hwpt; +err_unresv: + list_for_each_entry(cur, &igroup->device_list, group_item) + iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); +err_unlock: + mutex_unlock(&idev->igroup->lock); + return ERR_PTR(rc); } +typedef struct iommufd_hw_pagetable *(*attach_fn)( + struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + /* * When automatically managing the domains we search for a compatible domain in * the iopt and if one is found use it, otherwise create a new domain. * Automatic domain selection will never pick a manually created domain. */ -static int iommufd_device_auto_get_domain(struct iommufd_device *idev, - struct iommufd_ioas *ioas) +static struct iommufd_hw_pagetable * +iommufd_device_auto_get_domain(struct iommufd_device *idev, + struct iommufd_ioas *ioas, u32 *pt_id, + attach_fn do_attach) { + /* + * iommufd_hw_pagetable_attach() is called by + * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as + * iommufd_device_do_attach(). So if we are in this mode then we prefer + * to use the immediate_attach path as it supports drivers that can't + * directly allocate a domain. + */ + bool immediate_attach = do_attach == iommufd_device_do_attach; + struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_hw_pagetable *hwpt; - int rc; /* * There is no differentiation when domains are allocated, so any domain @@ -278,50 +521,58 @@ static int iommufd_device_auto_get_domain(struct iommufd_device *idev, if (!iommufd_lock_obj(&hwpt->obj)) continue; - rc = iommufd_device_do_attach(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) { + iommufd_put_object(&hwpt->obj); + /* + * -EINVAL means the domain is incompatible with the + * device. Other error codes should propagate to + * userspace as failure. Success means the domain is + * attached. + */ + if (PTR_ERR(destroy_hwpt) == -EINVAL) + continue; + goto out_unlock; + } + *pt_id = hwpt->obj.id; iommufd_put_object(&hwpt->obj); - - /* - * -EINVAL means the domain is incompatible with the device. - * Other error codes should propagate to userspace as failure. - * Success means the domain is attached. - */ - if (rc == -EINVAL) - continue; goto out_unlock; } - hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true); + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, + immediate_attach); if (IS_ERR(hwpt)) { - rc = PTR_ERR(hwpt); + destroy_hwpt = ERR_CAST(hwpt); goto out_unlock; } + + if (!immediate_attach) { + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) + goto out_abort; + } else { + destroy_hwpt = NULL; + } + hwpt->auto_domain = true; + *pt_id = hwpt->obj.id; - mutex_unlock(&ioas->mutex); iommufd_object_finalize(idev->ictx, &hwpt->obj); - return 0; + mutex_unlock(&ioas->mutex); + return destroy_hwpt; + +out_abort: + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); out_unlock: mutex_unlock(&ioas->mutex); - return rc; + return destroy_hwpt; } -/** - * iommufd_device_attach - Connect a device from an iommu_domain - * @idev: device to attach - * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE - * Output the IOMMUFD_OBJ_HW_PAGETABLE ID - * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. - * - * The caller should return the resulting pt_id back to userspace. - * This function is undone by calling iommufd_device_detach(). - */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, + attach_fn do_attach) { + struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; - int rc; pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); if (IS_ERR(pt_obj)) @@ -332,8 +583,8 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - rc = iommufd_device_do_attach(idev, hwpt); - if (rc) + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } @@ -341,27 +592,80 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - rc = iommufd_device_auto_get_domain(idev, ioas); - if (rc) + destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, + do_attach); + if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } default: - rc = -EINVAL; + destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } + iommufd_put_object(pt_obj); - refcount_inc(&idev->obj.users); - *pt_id = idev->hwpt->obj.id; - rc = 0; + /* This destruction has to be after we unlock everything */ + if (destroy_hwpt) + iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); + return 0; out_put_pt_obj: iommufd_put_object(pt_obj); - return rc; + return PTR_ERR(destroy_hwpt); +} + +/** + * iommufd_device_attach - Connect a device to an iommu_domain + * @idev: device to attach + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This connects the device to an iommu_domain, either automatically or manually + * selected. Once this completes the device could do DMA. + * + * The caller should return the resulting pt_id back to userspace. + * This function is undone by calling iommufd_device_detach(). + */ +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +{ + int rc; + + rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + if (rc) + return rc; + + /* + * Pairs with iommufd_device_detach() - catches caller bugs attempting + * to destroy a device with an attachment. + */ + refcount_inc(&idev->obj.users); + return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); /** + * iommufd_device_replace - Change the device's iommu_domain + * @idev: device to change + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This is the same as:: + * + * iommufd_device_detach(); + * iommufd_device_attach(); + * + * If it fails then no change is made to the attachment. The iommu driver may + * implement this so there is no disruption in translation. This can only be + * called if iommufd_device_attach() has already succeeded. + */ +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +{ + return iommufd_device_change_pt(idev, pt_id, + &iommufd_device_do_replace); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); + +/** * iommufd_device_detach - Disconnect a device to an iommu_domain * @idev: device to detach * @@ -370,33 +674,87 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); */ void iommufd_device_detach(struct iommufd_device *idev) { - struct iommufd_hw_pagetable *hwpt = idev->hwpt; - - mutex_lock(&hwpt->devices_lock); - list_del(&idev->devices_item); - idev->hwpt = NULL; - iommufd_hw_pagetable_detach(hwpt, idev); - mutex_unlock(&hwpt->devices_lock); - - if (hwpt->auto_domain) - iommufd_object_deref_user(idev->ictx, &hwpt->obj); - else - refcount_dec(&hwpt->obj.users); + struct iommufd_hw_pagetable *hwpt; + hwpt = iommufd_hw_pagetable_detach(idev); + iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); +/* + * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at + * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should + * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. + */ +static int iommufd_access_change_ioas(struct iommufd_access *access, + struct iommufd_ioas *new_ioas) +{ + u32 iopt_access_list_id = access->iopt_access_list_id; + struct iommufd_ioas *cur_ioas = access->ioas; + int rc; + + lockdep_assert_held(&access->ioas_lock); + + /* We are racing with a concurrent detach, bail */ + if (cur_ioas != access->ioas_unpin) + return -EBUSY; + + if (cur_ioas == new_ioas) + return 0; + + /* + * Set ioas to NULL to block any further iommufd_access_pin_pages(). + * iommufd_access_unpin_pages() can continue using access->ioas_unpin. + */ + access->ioas = NULL; + + if (new_ioas) { + rc = iopt_add_access(&new_ioas->iopt, access); + if (rc) { + access->ioas = cur_ioas; + return rc; + } + refcount_inc(&new_ioas->obj.users); + } + + if (cur_ioas) { + if (access->ops->unmap) { + mutex_unlock(&access->ioas_lock); + access->ops->unmap(access->data, 0, ULONG_MAX); + mutex_lock(&access->ioas_lock); + } + iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); + refcount_dec(&cur_ioas->obj.users); + } + + access->ioas = new_ioas; + access->ioas_unpin = new_ioas; + + return 0; +} + +static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) +{ + struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); + int rc; + + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + rc = iommufd_access_change_ioas(access, ioas); + iommufd_put_object(&ioas->obj); + return rc; +} + void iommufd_access_destroy_object(struct iommufd_object *obj) { struct iommufd_access *access = container_of(obj, struct iommufd_access, obj); - if (access->ioas) { - iopt_remove_access(&access->ioas->iopt, access); - refcount_dec(&access->ioas->obj.users); - access->ioas = NULL; - } + mutex_lock(&access->ioas_lock); + if (access->ioas) + WARN_ON(iommufd_access_change_ioas(access, NULL)); + mutex_unlock(&access->ioas_lock); iommufd_ctx_put(access->ictx); } @@ -441,6 +799,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; + mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); @@ -457,30 +816,49 @@ void iommufd_access_destroy(struct iommufd_access *access) } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +void iommufd_access_detach(struct iommufd_access *access) +{ + mutex_lock(&access->ioas_lock); + if (WARN_ON(!access->ioas)) { + mutex_unlock(&access->ioas_lock); + return; + } + WARN_ON(iommufd_access_change_ioas(access, NULL)); + mutex_unlock(&access->ioas_lock); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); + int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { - struct iommufd_ioas *new_ioas; - int rc = 0; + int rc; - if (access->ioas) + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas)) { + mutex_unlock(&access->ioas_lock); return -EINVAL; - - new_ioas = iommufd_get_ioas(access->ictx, ioas_id); - if (IS_ERR(new_ioas)) - return PTR_ERR(new_ioas); - - rc = iopt_add_access(&new_ioas->iopt, access); - if (rc) { - iommufd_put_object(&new_ioas->obj); - return rc; } - iommufd_ref_to_users(&new_ioas->obj); - access->ioas = new_ioas; - return 0; + rc = iommufd_access_change_ioas_id(access, ioas_id); + mutex_unlock(&access->ioas_lock); + return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); +int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) +{ + int rc; + + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + rc = iommufd_access_change_ioas_id(access, ioas_id); + mutex_unlock(&access->ioas_lock); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); + /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it * @iopt: iopt to work on @@ -531,8 +909,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; @@ -540,6 +918,17 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; + mutex_lock(&access->ioas_lock); + /* + * The driver must be doing something wrong if it calls this before an + * iommufd_access_attach() or after an iommufd_access_detach(). + */ + if (WARN_ON(!access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); + return; + } + iopt = &access->ioas_unpin->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( @@ -549,6 +938,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, min(last_iova, iopt_area_last_iova(area)))); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); @@ -594,8 +984,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; @@ -610,6 +1000,13 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -640,6 +1037,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return 0; err_remove: @@ -654,6 +1052,7 @@ err_remove: iopt_area_last_iova(area)))); } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); @@ -673,8 +1072,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc; @@ -684,6 +1083,13 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -710,6 +1116,79 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); + +int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) +{ + struct iommu_hw_info *cmd = ucmd->cmd; + void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); + const struct iommu_ops *ops; + struct iommufd_device *idev; + unsigned int data_len; + unsigned int copy_len; + void *data; + int rc; + + if (cmd->flags || cmd->__reserved) + return -EOPNOTSUPP; + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + ops = dev_iommu_ops(idev->dev); + if (ops->hw_info) { + data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); + if (IS_ERR(data)) { + rc = PTR_ERR(data); + goto out_put; + } + + /* + * drivers that have hw_info callback should have a unique + * iommu_hw_info_type. + */ + if (WARN_ON_ONCE(cmd->out_data_type == + IOMMU_HW_INFO_TYPE_NONE)) { + rc = -ENODEV; + goto out_free; + } + } else { + cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; + data_len = 0; + data = NULL; + } + + copy_len = min(cmd->data_len, data_len); + if (copy_to_user(user_ptr, data, copy_len)) { + rc = -EFAULT; + goto out_free; + } + + /* + * Zero the trailing bytes if the user buffer is bigger than the + * data size kernel actually has. + */ + if (copy_len < cmd->data_len) { + if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { + rc = -EFAULT; + goto out_free; + } + } + + /* + * We return the length the kernel supports so userspace may know what + * the kernel capability is. It could be larger than the input buffer. + */ + cmd->data_len = data_len; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_free: + kfree(data); +out_put: + iommufd_put_object(&idev->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 6cdb6749d359..cf2c1504e20d 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -3,6 +3,7 @@ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include <linux/iommu.h> +#include <uapi/linux/iommufd.h> #include "iommufd_private.h" @@ -11,8 +12,6 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) struct iommufd_hw_pagetable *hwpt = container_of(obj, struct iommufd_hw_pagetable, obj); - WARN_ON(!list_empty(&hwpt->devices)); - if (!list_empty(&hwpt->hwpt_item)) { mutex_lock(&hwpt->ioas->mutex); list_del(&hwpt->hwpt_item); @@ -25,7 +24,35 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) iommu_domain_free(hwpt->domain); refcount_dec(&hwpt->ioas->obj.users); - mutex_destroy(&hwpt->devices_lock); +} + +void iommufd_hw_pagetable_abort(struct iommufd_object *obj) +{ + struct iommufd_hw_pagetable *hwpt = + container_of(obj, struct iommufd_hw_pagetable, obj); + + /* The ioas->mutex must be held until finalize is called. */ + lockdep_assert_held(&hwpt->ioas->mutex); + + if (!list_empty(&hwpt->hwpt_item)) { + list_del_init(&hwpt->hwpt_item); + iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + } + iommufd_hw_pagetable_destroy(obj); +} + +int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) +{ + if (hwpt->enforce_cache_coherency) + return 0; + + if (hwpt->domain->ops->enforce_cache_coherency) + hwpt->enforce_cache_coherency = + hwpt->domain->ops->enforce_cache_coherency( + hwpt->domain); + if (!hwpt->enforce_cache_coherency) + return -EINVAL; + return 0; } /** @@ -38,6 +65,10 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT * will be linked to the given ioas and upon return the underlying iommu_domain * is fully popoulated. + * + * The caller must hold the ioas->mutex until after + * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on + * the returned hwpt. */ struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, @@ -52,9 +83,7 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if (IS_ERR(hwpt)) return hwpt; - INIT_LIST_HEAD(&hwpt->devices); INIT_LIST_HEAD(&hwpt->hwpt_item); - mutex_init(&hwpt->devices_lock); /* Pairs with iommufd_hw_pagetable_destroy() */ refcount_inc(&ioas->obj.users); hwpt->ioas = ioas; @@ -65,7 +94,18 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } - mutex_lock(&hwpt->devices_lock); + /* + * Set the coherency mode before we do iopt_table_add_domain() as some + * iommus have a per-PTE bit that controls it and need to decide before + * doing any maps. It is an iommu driver bug to report + * IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on + * a new domain. + */ + if (idev->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (WARN_ON(rc)) + goto out_abort; + } /* * immediate_attach exists only to accommodate iommu drivers that cannot @@ -76,30 +116,64 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if (immediate_attach) { rc = iommufd_hw_pagetable_attach(hwpt, idev); if (rc) - goto out_unlock; + goto out_abort; } rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); if (rc) goto out_detach; list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); - - if (immediate_attach) { - /* See iommufd_device_do_attach() */ - refcount_inc(&hwpt->obj.users); - idev->hwpt = hwpt; - list_add(&idev->devices_item, &hwpt->devices); - } - - mutex_unlock(&hwpt->devices_lock); return hwpt; out_detach: if (immediate_attach) - iommufd_hw_pagetable_detach(hwpt, idev); -out_unlock: - mutex_unlock(&hwpt->devices_lock); + iommufd_hw_pagetable_detach(idev); out_abort: iommufd_object_abort_and_destroy(ictx, &hwpt->obj); return ERR_PTR(rc); } + +int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_alloc *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_device *idev; + struct iommufd_ioas *ioas; + int rc; + + if (cmd->flags || cmd->__reserved) + return -EOPNOTSUPP; + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id); + if (IS_ERR(ioas)) { + rc = PTR_ERR(ioas); + goto out_put_idev; + } + + mutex_lock(&ioas->mutex); + hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_unlock; + } + + cmd->out_hwpt_id = hwpt->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_hwpt; + iommufd_object_finalize(ucmd->ictx, &hwpt->obj); + goto out_unlock; + +out_hwpt: + iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj); +out_unlock: + mutex_unlock(&ioas->mutex); + iommufd_put_object(&ioas->obj); +out_put_idev: + iommufd_put_object(&idev->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 724c4c574241..3a598182b761 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1158,36 +1158,36 @@ out_unlock: } void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access) + struct iommufd_access *access, + u32 iopt_access_list_id) { down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); - WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != - access); + WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); WARN_ON(iopt_calculate_iova_alignment(iopt)); up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); } -/* Narrow the valid_iova_itree to include reserved ranges from a group. */ -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, - struct device *device, - struct iommu_group *group, - phys_addr_t *sw_msi_start) +/* Narrow the valid_iova_itree to include reserved ranges from a device. */ +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, + struct device *dev, + phys_addr_t *sw_msi_start) { struct iommu_resv_region *resv; - struct iommu_resv_region *tmp; - LIST_HEAD(group_resv_regions); + LIST_HEAD(resv_regions); unsigned int num_hw_msi = 0; unsigned int num_sw_msi = 0; int rc; + if (iommufd_should_fail()) + return -EINVAL; + down_write(&iopt->iova_rwsem); - rc = iommu_get_group_resv_regions(group, &group_resv_regions); - if (rc) - goto out_unlock; + /* FIXME: drivers allocate memory but there is no failure propogated */ + iommu_get_resv_regions(dev, &resv_regions); - list_for_each_entry(resv, &group_resv_regions, list) { + list_for_each_entry(resv, &resv_regions, list) { if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) continue; @@ -1199,7 +1199,7 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, } rc = iopt_reserve_iova(iopt, resv->start, - resv->length - 1 + resv->start, device); + resv->length - 1 + resv->start, dev); if (rc) goto out_reserved; } @@ -1214,11 +1214,9 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, goto out_free_resv; out_reserved: - __iopt_remove_reserved_iova(iopt, device); + __iopt_remove_reserved_iova(iopt, dev); out_free_resv: - list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) - kfree(resv); -out_unlock: + iommu_put_resv_regions(dev, &resv_regions); up_write(&iopt->iova_rwsem); return rc; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f9790983699c..2c58670011fe 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -17,6 +17,7 @@ struct iommufd_device; struct iommufd_ctx { struct file *file; struct xarray objects; + struct xarray groups; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -75,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain); void iopt_table_remove_domain(struct io_pagetable *iopt, struct iommu_domain *domain); -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, - struct device *device, - struct iommu_group *group, - phys_addr_t *sw_msi_start); +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, + struct device *dev, + phys_addr_t *sw_msi_start); int iopt_set_allow_iova(struct io_pagetable *iopt, struct rb_root_cached *allowed_iova); int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, @@ -119,6 +119,7 @@ enum iommufd_object_type { #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif + IOMMUFD_OBJ_MAX, }; /* Base struct for all objects with a userspace ID handle. */ @@ -148,29 +149,6 @@ static inline void iommufd_put_object(struct iommufd_object *obj) up_read(&obj->destroy_rwsem); } -/** - * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount - * protection - * @obj - Object to release - * - * Objects have two refcount protections (destroy_rwsem and the refcount_t - * users). Holding either of these will prevent the object from being destroyed. - * - * Depending on the use case, one protection or the other is appropriate. In - * most cases references are being protected by the destroy_rwsem. This allows - * orderly destruction of the object because iommufd_object_destroy_user() will - * wait for it to become unlocked. However, as a rwsem, it cannot be held across - * a system call return. So cases that have longer term needs must switch - * to the weaker users refcount_t. - * - * With users protection iommufd_object_destroy_user() will return false, - * refusing to destroy the object, causing -EBUSY to userspace. - */ -static inline void iommufd_ref_to_users(struct iommufd_object *obj) -{ - up_read(&obj->destroy_rwsem); - /* iommufd_lock_obj() obtains users as well */ -} void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj); @@ -260,18 +238,39 @@ struct iommufd_hw_pagetable { bool msi_cookie : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; - struct mutex devices_lock; - struct list_head devices; }; struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct iommufd_device *idev, bool immediate_attach); +int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); -void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_detach(struct iommufd_device *idev); void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); +void iommufd_hw_pagetable_abort(struct iommufd_object *obj); +int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); + +static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt) +{ + lockdep_assert_not_held(&hwpt->ioas->mutex); + if (hwpt->auto_domain) + iommufd_object_deref_user(ictx, &hwpt->obj); + else + refcount_dec(&hwpt->obj.users); +} + +struct iommufd_group { + struct kref ref; + struct mutex lock; + struct iommufd_ctx *ictx; + struct iommu_group *group; + struct iommufd_hw_pagetable *hwpt; + struct list_head device_list; + phys_addr_t sw_msi_start; +}; /* * A iommufd_device object represents the binding relationship between a @@ -281,21 +280,30 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); struct iommufd_device { struct iommufd_object obj; struct iommufd_ctx *ictx; - struct iommufd_hw_pagetable *hwpt; - /* Head at iommufd_hw_pagetable::devices */ - struct list_head devices_item; + struct iommufd_group *igroup; + struct list_head group_item; /* always the physical device */ struct device *dev; - struct iommu_group *group; bool enforce_cache_coherency; }; +static inline struct iommufd_device * +iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_DEVICE), + struct iommufd_device, obj); +} + void iommufd_device_destroy(struct iommufd_object *obj); +int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_ioas *ioas; + struct iommufd_ioas *ioas_unpin; + struct mutex ioas_lock; const struct iommufd_access_ops *ops; void *data; unsigned long iova_alignment; @@ -304,7 +312,8 @@ struct iommufd_access { int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access); + struct iommufd_access *access, + u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); #ifdef CONFIG_IOMMUFD_TEST @@ -314,7 +323,7 @@ extern size_t iommufd_test_memory_limit; void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags); bool iommufd_should_fail(void); -void __init iommufd_test_init(void); +int __init iommufd_test_init(void); void iommufd_test_exit(void); bool iommufd_selftest_is_mock_dev(struct device *dev); #else @@ -327,8 +336,9 @@ static inline bool iommufd_should_fail(void) { return false; } -static inline void __init iommufd_test_init(void) +static inline int __init iommufd_test_init(void) { + return 0; } static inline void iommufd_test_exit(void) { diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index b3d69cca7729..3f3644375bf1 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -17,6 +17,8 @@ enum { IOMMU_TEST_OP_ACCESS_PAGES, IOMMU_TEST_OP_ACCESS_RW, IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, + IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, + IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, }; enum { @@ -51,8 +53,13 @@ struct iommu_test_cmd { struct { __u32 out_stdev_id; __u32 out_hwpt_id; + /* out_idev_id is the standard iommufd_bind object */ + __u32 out_idev_id; } mock_domain; struct { + __u32 pt_id; + } mock_domain_replace; + struct { __aligned_u64 iova; __aligned_u64 length; __aligned_u64 uptr; @@ -85,9 +92,21 @@ struct iommu_test_cmd { struct { __u32 limit; } memory_limit; + struct { + __u32 ioas_id; + } access_replace_ioas; }; __u32 last; }; #define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) +/* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */ +#define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef +#define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef + +struct iommu_test_hw_info { + __u32 flags; + __u32 test_reg; +}; + #endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 4cf5f73f2708..e71523cbd0de 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -24,6 +24,7 @@ struct iommufd_object_ops { void (*destroy)(struct iommufd_object *obj); + void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; @@ -32,6 +33,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { + static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; struct iommufd_object *obj; int rc; @@ -39,7 +41,15 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; - init_rwsem(&obj->destroy_rwsem); + /* + * In most cases the destroy_rwsem is obtained with try so it doesn't + * interact with lockdep, however on destroy we have to sleep. This + * means if we have to destroy an object while holding a get on another + * object it triggers lockdep. Using one locking class per object type + * is a simple and reasonable way to avoid this. + */ + __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", + &obj_keys[type]); refcount_set(&obj->users, 1); /* @@ -50,7 +60,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, * before calling iommufd_object_finalize(). */ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, - xa_limit_32b, GFP_KERNEL_ACCOUNT); + xa_limit_31b, GFP_KERNEL_ACCOUNT); if (rc) goto out_free; return obj; @@ -95,7 +105,10 @@ void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj) { - iommufd_object_ops[obj->type].destroy(obj); + if (iommufd_object_ops[obj->type].abort) + iommufd_object_ops[obj->type].abort(obj); + else + iommufd_object_ops[obj->type].destroy(obj); iommufd_object_abort(ictx, obj); } @@ -223,6 +236,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) } xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); + xa_init(&ictx->groups); ictx->file = filp; filp->private_data = ictx; return 0; @@ -258,6 +272,7 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) if (WARN_ON(!destroyed)) break; } + WARN_ON(!xa_empty(&ictx->groups)); kfree(ictx); return 0; } @@ -290,6 +305,8 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_hw_info info; + struct iommu_hwpt_alloc hwpt; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -321,6 +338,10 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, + __reserved), + IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, + __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, @@ -418,6 +439,30 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); /** + * iommufd_ctx_from_fd - Acquires a reference to the iommufd context + * @fd: File descriptor to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_fd(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &iommufd_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + /* fget is the same as iommufd_ctx_get() */ + return file->private_data; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD); + +/** * iommufd_ctx_put - Put back a reference * @ictx: Context to put back */ @@ -439,6 +484,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_HW_PAGETABLE] = { .destroy = iommufd_hw_pagetable_destroy, + .abort = iommufd_hw_pagetable_abort, }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { @@ -477,8 +523,14 @@ static int __init iommufd_init(void) if (ret) goto err_misc; } - iommufd_test_init(); + ret = iommufd_test_init(); + if (ret) + goto err_vfio_misc; return 0; + +err_vfio_misc: + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) + misc_deregister(&vfio_misc_dev); err_misc: misc_deregister(&iommu_misc_dev); return ret; @@ -499,5 +551,6 @@ module_exit(iommufd_exit); MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif +MODULE_IMPORT_NS(IOMMUFD_INTERNAL); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 74c2076105d4..56506d5753f1 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -9,14 +9,17 @@ #include <linux/file.h> #include <linux/anon_inodes.h> #include <linux/fault-inject.h> +#include <linux/platform_device.h> #include <uapi/linux/iommufd.h> +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" #include "iommufd_test.h" static DECLARE_FAULT_ATTR(fail_iommufd); static struct dentry *dbgfs_root; +static struct platform_device *selftest_iommu_dev; size_t iommufd_test_memory_limit = 65536; @@ -128,6 +131,21 @@ static struct iommu_domain mock_blocking_domain = { .ops = &mock_blocking_ops, }; +static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) +{ + struct iommu_test_hw_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->test_reg = IOMMU_HW_INFO_SELFTEST_REGVAL; + *length = sizeof(*info); + *type = IOMMU_HW_INFO_TYPE_SELFTEST; + + return info; +} + static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) { struct mock_iommu_domain *mock; @@ -135,7 +153,7 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED) return &mock_blocking_domain; - if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)) + if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED) return NULL; mock = kzalloc(sizeof(*mock), GFP_KERNEL); @@ -276,12 +294,23 @@ static void mock_domain_set_plaform_dma_ops(struct device *dev) */ } +static struct iommu_device mock_iommu_device = { +}; + +static struct iommu_device *mock_probe_device(struct device *dev) +{ + return &mock_iommu_device; +} + static const struct iommu_ops mock_ops = { .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, + .hw_info = mock_domain_hw_info, .domain_alloc = mock_domain_alloc, .capable = mock_domain_capable, .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, + .device_group = generic_device_group, + .probe_device = mock_probe_device, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -292,10 +321,6 @@ static const struct iommu_ops mock_ops = { }, }; -static struct iommu_device mock_iommu_device = { - .ops = &mock_ops, -}; - static inline struct iommufd_hw_pagetable * get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, struct mock_iommu_domain **mock) @@ -316,22 +341,29 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; } -static struct bus_type iommufd_mock_bus_type = { - .name = "iommufd_mock", - .iommu_ops = &mock_ops, +struct mock_bus_type { + struct bus_type bus; + struct notifier_block nb; +}; + +static struct mock_bus_type iommufd_mock_bus_type = { + .bus = { + .name = "iommufd_mock", + }, }; +static atomic_t mock_dev_num; + static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + atomic_dec(&mock_dev_num); kfree(mdev); } static struct mock_dev *mock_dev_create(void) { - struct iommu_group *iommu_group; - struct dev_iommu *dev_iommu; struct mock_dev *mdev; int rc; @@ -341,51 +373,18 @@ static struct mock_dev *mock_dev_create(void) device_initialize(&mdev->dev); mdev->dev.release = mock_dev_release; - mdev->dev.bus = &iommufd_mock_bus_type; - - iommu_group = iommu_group_alloc(); - if (IS_ERR(iommu_group)) { - rc = PTR_ERR(iommu_group); - goto err_put; - } + mdev->dev.bus = &iommufd_mock_bus_type.bus; rc = dev_set_name(&mdev->dev, "iommufd_mock%u", - iommu_group_id(iommu_group)); + atomic_inc_return(&mock_dev_num)); if (rc) - goto err_group; - - /* - * The iommu core has no way to associate a single device with an iommu - * driver (heck currently it can't even support two iommu_drivers - * registering). Hack it together with an open coded dev_iommu_get(). - * Notice that the normal notifier triggered iommu release process also - * does not work here because this bus is not in iommu_buses. - */ - mdev->dev.iommu = kzalloc(sizeof(*dev_iommu), GFP_KERNEL); - if (!mdev->dev.iommu) { - rc = -ENOMEM; - goto err_group; - } - mutex_init(&mdev->dev.iommu->lock); - mdev->dev.iommu->iommu_dev = &mock_iommu_device; + goto err_put; rc = device_add(&mdev->dev); if (rc) - goto err_dev_iommu; - - rc = iommu_group_add_device(iommu_group, &mdev->dev); - if (rc) - goto err_del; - iommu_group_put(iommu_group); + goto err_put; return mdev; -err_del: - device_del(&mdev->dev); -err_dev_iommu: - kfree(mdev->dev.iommu); - mdev->dev.iommu = NULL; -err_group: - iommu_group_put(iommu_group); err_put: put_device(&mdev->dev); return ERR_PTR(rc); @@ -393,11 +392,7 @@ err_put: static void mock_dev_destroy(struct mock_dev *mdev) { - iommu_group_remove_device(&mdev->dev); - device_del(&mdev->dev); - kfree(mdev->dev.iommu); - mdev->dev.iommu = NULL; - put_device(&mdev->dev); + device_unregister(&mdev->dev); } bool iommufd_selftest_is_mock_dev(struct device *dev) @@ -443,9 +438,15 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, /* Userspace must destroy the device_id to destroy the object */ cmd->mock_domain.out_hwpt_id = pt_id; cmd->mock_domain.out_stdev_id = sobj->obj.id; + cmd->mock_domain.out_idev_id = idev_id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_detach; iommufd_object_finalize(ucmd->ictx, &sobj->obj); - return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + return 0; +out_detach: + iommufd_device_detach(idev); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -455,6 +456,42 @@ out_sobj: return rc; } +/* Replace the mock domain with a manually allocated hw_pagetable */ +static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, + unsigned int device_id, u32 pt_id, + struct iommu_test_cmd *cmd) +{ + struct iommufd_object *dev_obj; + struct selftest_obj *sobj; + int rc; + + /* + * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure + * it doesn't race with detach, which is not allowed. + */ + dev_obj = + iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); + if (IS_ERR(dev_obj)) + return PTR_ERR(dev_obj); + + sobj = container_of(dev_obj, struct selftest_obj, obj); + if (sobj->type != TYPE_IDEV) { + rc = -EINVAL; + goto out_dev_obj; + } + + rc = iommufd_device_replace(sobj->idev.idev, &pt_id); + if (rc) + goto out_dev_obj; + + cmd->mock_domain_replace.pt_id = pt_id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_dev_obj: + iommufd_put_object(dev_obj); + return rc; +} + /* Add an additional reserved IOVA to the IOAS */ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, @@ -748,6 +785,22 @@ out_free_staccess: return rc; } +static int iommufd_test_access_replace_ioas(struct iommufd_ucmd *ucmd, + unsigned int access_id, + unsigned int ioas_id) +{ + struct selftest_access *staccess; + int rc; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + rc = iommufd_access_replace(staccess->access, ioas_id); + fput(staccess->file); + return rc; +} + /* Check that the pages in a page array match the pages in the user VA */ static int iommufd_test_check_pages(void __user *uptr, struct page **pages, size_t npages) @@ -948,6 +1001,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->add_reserved.length); case IOMMU_TEST_OP_MOCK_DOMAIN: return iommufd_test_mock_domain(ucmd, cmd); + case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE: + return iommufd_test_mock_domain_replace( + ucmd, cmd->id, cmd->mock_domain_replace.pt_id, cmd); case IOMMU_TEST_OP_MD_CHECK_MAP: return iommufd_test_md_check_pa( ucmd, cmd->id, cmd->check_map.iova, @@ -960,6 +1016,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd) case IOMMU_TEST_OP_CREATE_ACCESS: return iommufd_test_create_access(ucmd, cmd->id, cmd->create_access.flags); + case IOMMU_TEST_OP_ACCESS_REPLACE_IOAS: + return iommufd_test_access_replace_ioas( + ucmd, cmd->id, cmd->access_replace_ioas.ioas_id); case IOMMU_TEST_OP_ACCESS_PAGES: return iommufd_test_access_pages( ucmd, cmd->id, cmd->access_pages.iova, @@ -992,15 +1051,57 @@ bool iommufd_should_fail(void) return should_fail(&fail_iommufd, 1); } -void __init iommufd_test_init(void) +int __init iommufd_test_init(void) { + struct platform_device_info pdevinfo = { + .name = "iommufd_selftest_iommu", + }; + int rc; + dbgfs_root = fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd); - WARN_ON(bus_register(&iommufd_mock_bus_type)); + + selftest_iommu_dev = platform_device_register_full(&pdevinfo); + if (IS_ERR(selftest_iommu_dev)) { + rc = PTR_ERR(selftest_iommu_dev); + goto err_dbgfs; + } + + rc = bus_register(&iommufd_mock_bus_type.bus); + if (rc) + goto err_platform; + + rc = iommu_device_sysfs_add(&mock_iommu_device, + &selftest_iommu_dev->dev, NULL, "%s", + dev_name(&selftest_iommu_dev->dev)); + if (rc) + goto err_bus; + + rc = iommu_device_register_bus(&mock_iommu_device, &mock_ops, + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); + if (rc) + goto err_sysfs; + return 0; + +err_sysfs: + iommu_device_sysfs_remove(&mock_iommu_device); +err_bus: + bus_unregister(&iommufd_mock_bus_type.bus); +err_platform: + platform_device_unregister(selftest_iommu_dev); +err_dbgfs: + debugfs_remove_recursive(dbgfs_root); + return rc; } void iommufd_test_exit(void) { + iommu_device_sysfs_remove(&mock_iommu_device); + iommu_device_unregister_bus(&mock_iommu_device, + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); + bus_unregister(&iommufd_mock_bus_type.bus); + platform_device_unregister(selftest_iommu_dev); debugfs_remove_recursive(dbgfs_root); - bus_unregister(&iommufd_mock_bus_type); } diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index fe02517c73cc..6c810bf80f99 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -483,6 +483,8 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, rc = cap_size; goto out_put; } + cap_size = ALIGN(cap_size, sizeof(u64)); + if (last_cap && info.argsz >= total_cap_size && put_user(total_cap_size, &last_cap->next)) { rc = -EFAULT; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9f64c5c9f5b9..65ff69477c43 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -14,11 +14,12 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/of.h> -#include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/pci.h> #include <linux/platform_device.h> #include <linux/sizes.h> #include <linux/slab.h> @@ -253,17 +254,13 @@ static void ipmmu_imuctr_write(struct ipmmu_vmsa_device *mmu, /* Wait for any pending TLB invalidations to complete */ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) { - unsigned int count = 0; + u32 val; - while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) { - cpu_relax(); - if (++count == TLB_LOOP_TIMEOUT) { - dev_err_ratelimited(domain->mmu->dev, + if (read_poll_timeout_atomic(ipmmu_ctx_read_root, val, + !(val & IMCTR_FLUSH), 1, TLB_LOOP_TIMEOUT, + false, domain, IMCTR)) + dev_err_ratelimited(domain->mmu->dev, "TLB sync timed out -- MMU may be deadlocked\n"); - return; - } - udelay(1); - } } static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) @@ -723,6 +720,10 @@ static bool ipmmu_device_is_allowed(struct device *dev) if (soc_device_match(soc_denylist)) return false; + /* Check whether this device is a PCI device */ + if (dev_is_pci(dev)) + return true; + /* Check whether this device can work with the IPMMU */ for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) { if (!strcmp(dev_name(dev), devices_allowlist[i])) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e93906d6e112..640275873a27 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -3,6 +3,7 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu <yong.wu@mediatek.com> */ +#include <linux/arm-smccc.h> #include <linux/bitfield.h> #include <linux/bug.h> #include <linux/clk.h> @@ -27,6 +28,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/soc/mediatek/infracfg.h> +#include <linux/soc/mediatek/mtk_sip_svc.h> #include <asm/barrier.h> #include <soc/mediatek/smi.h> @@ -143,6 +145,7 @@ #define PGTABLE_PA_35_EN BIT(17) #define TF_PORT_TO_ADDR_MT8173 BIT(18) #define INT_ID_PORT_WIDTH_6 BIT(19) +#define CFG_IFA_MASTER_IN_ATF BIT(20) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -167,6 +170,7 @@ enum mtk_iommu_plat { M4U_MT8173, M4U_MT8183, M4U_MT8186, + M4U_MT8188, M4U_MT8192, M4U_MT8195, M4U_MT8365, @@ -258,6 +262,8 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; + struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */ + struct regmap *pericfg; struct mutex mutex; /* Protect m4u_group/m4u_dom above */ @@ -577,41 +583,55 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, unsigned int larbid, portid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); const struct mtk_iommu_iova_region *region; - u32 peri_mmuen, peri_mmuen_msk; + unsigned long portid_msk = 0; + struct arm_smccc_res res; int i, ret = 0; for (i = 0; i < fwspec->num_ids; ++i) { - larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); + portid_msk |= BIT(portid); + } - if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { - larb_mmu = &data->larb_imu[larbid]; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + /* All ports should be in the same larb. just use 0 here */ + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + larb_mmu = &data->larb_imu[larbid]; + region = data->plat_data->iova_region + regionid; - region = data->plat_data->iova_region + regionid; + for_each_set_bit(portid, &portid_msk, 32) larb_mmu->bank[portid] = upper_32_bits(region->iova_base); - dev_dbg(dev, "%s iommu for larb(%s) port %d region %d rgn-bank %d.\n", - enable ? "enable" : "disable", dev_name(larb_mmu->dev), - portid, regionid, larb_mmu->bank[portid]); + dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid_msk, regionid, upper_32_bits(region->iova_base)); - if (enable) - larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); - else - larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { - peri_mmuen_msk = BIT(portid); + if (enable) + larb_mmu->mmu |= portid_msk; + else + larb_mmu->mmu &= ~portid_msk; + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) { + arm_smccc_smc(MTK_SIP_KERNEL_IOMMU_CONTROL, + IOMMU_ATF_CMD_CONFIG_INFRA_IOMMU, + portid_msk, enable, 0, 0, 0, 0, &res); + ret = res.a0; + } else { /* PCI dev has only one output id, enable the next writing bit for PCIe */ - if (dev_is_pci(dev)) - peri_mmuen_msk |= BIT(portid + 1); + if (dev_is_pci(dev)) { + if (fwspec->num_ids != 1) { + dev_err(dev, "PCI dev can only have one port.\n"); + return -ENODEV; + } + portid_msk |= BIT(portid + 1); + } - peri_mmuen = enable ? peri_mmuen_msk : 0; ret = regmap_update_bits(data->pericfg, PERICFG_IOMMU_1, - peri_mmuen_msk, peri_mmuen); - if (ret) - dev_err(dev, "%s iommu(%s) inframaster 0x%x fail(%d).\n", - enable ? "enable" : "disable", - dev_name(data->dev), peri_mmuen_msk, ret); + (u32)portid_msk, enable ? (u32)portid_msk : 0); } + if (ret) + dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n", + enable ? "enable" : "disable", + dev_name(data->dev), portid_msk, ret); } return ret; } @@ -620,15 +640,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, struct mtk_iommu_data *data, unsigned int region_id) { + struct mtk_iommu_domain *share_dom = data->share_dom; const struct mtk_iommu_iova_region *region; - struct mtk_iommu_domain *m4u_dom; - - /* Always use bank0 in sharing pgtable case */ - m4u_dom = data->bank[0].m4u_dom; - if (m4u_dom) { - dom->iop = m4u_dom->iop; - dom->cfg = m4u_dom->cfg; - dom->domain.pgsize_bitmap = m4u_dom->cfg.pgsize_bitmap; + + /* Always use share domain in sharing pgtable case */ + if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) { + dom->iop = share_dom->iop; + dom->cfg = share_dom->cfg; + dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap; goto update_iova_region; } @@ -658,6 +677,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) + data->share_dom = dom; + update_iova_region: /* Update the iova region for this domain */ region = data->plat_data->iova_region + region_id; @@ -708,7 +730,9 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, /* Data is in the frstdata in sharing pgtable case. */ frstdata = mtk_iommu_get_frst_data(hw_list); + mutex_lock(&frstdata->mutex); ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); + mutex_unlock(&frstdata->mutex); if (ret) { mutex_unlock(&dom->mutex); return ret; @@ -1318,7 +1342,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) dev_err_probe(dev, ret, "mm dts parse fail\n"); goto out_runtime_disable; } - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && + !MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) { p = data->plat_data->pericfg_comp_str; data->pericfg = syscon_regmap_lookup_by_compatible(p); if (IS_ERR(data->pericfg)) { @@ -1570,6 +1595,67 @@ static const struct mtk_iommu_plat_data mt8186_data_mm = { .iova_region_larb_msk = mt8186_larb_region_msk, }; +static const struct mtk_iommu_plat_data mt8188_data_infra = { + .m4u_plat = M4U_MT8188, + .flags = WR_THROT_EN | DCM_DISABLE | STD_AXI_MODE | PM_CLK_AO | + MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT | + PGTABLE_PA_35_EN | CFG_IFA_MASTER_IN_ATF, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), +}; + +static const u32 mt8188_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0, ~0, ~0}, /* Region0: all ports for larb0/1/2/3 */ + [1] = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, ~0, ~0, ~0}, /* Region1: larb19(21)/21(22)/23 */ + [2] = {0, 0, 0, 0, ~0, ~0, ~0, ~0, /* Region2: the other larbs. */ + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, + ~0, ~0, ~0, ~0, ~0, 0, 0, 0, + 0, ~0}, + [3] = {0}, + [4] = {[24] = BIT(0) | BIT(1)}, /* Only larb27(24) port0/1 */ + [5] = {[24] = BIT(2) | BIT(3)}, /* Only larb27(24) port2/3 */ +}; + +static const struct mtk_iommu_plat_data mt8188_data_vdo = { + .m4u_plat = M4U_MT8188, + .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | + PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8188_larb_region_msk, + .larbid_remap = {{2}, {0}, {21}, {0}, {19}, {9, 10, + 11 /* 11a */, 25 /* 11c */}, + {13, 0, 29 /* 16b */, 30 /* 17b */, 0}, {5}}, +}; + +static const struct mtk_iommu_plat_data mt8188_data_vpp = { + .m4u_plat = M4U_MT8188, + .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | + PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8188_larb_region_msk, + .larbid_remap = {{1}, {3}, {23}, {7}, {MTK_INVALID_LARBID}, + {12, 15, 24 /* 11b */}, {14, MTK_INVALID_LARBID, + 16 /* 16a */, 17 /* 17a */, MTK_INVALID_LARBID, + 27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}}, +}; + static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { [0] = {~0, ~0}, /* Region0: larb0/1 */ [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ @@ -1678,6 +1764,9 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, { .compatible = "mediatek,mt8186-iommu-mm", .data = &mt8186_data_mm}, /* mm: m4u */ + { .compatible = "mediatek,mt8188-iommu-infra", .data = &mt8188_data_infra}, + { .compatible = "mediatek,mt8188-iommu-vdo", .data = &mt8188_data_vdo}, + { .compatible = "mediatek,mt8188-iommu-vpp", .data = &mt8188_data_vpp}, { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 40f57d293a79..157b286e36bf 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -159,7 +159,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, * If we have reason to believe the IOMMU driver missed the initial * probe for dev, replay it to get things in order. */ - if (!err && dev->bus && !device_iommu_mapped(dev)) + if (!err && dev->bus) err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4054030c3237..8ff69fbf9f65 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -98,9 +98,8 @@ struct rk_iommu_ops { phys_addr_t (*pt_address)(u32 dte); u32 (*mk_dtentries)(dma_addr_t pt_dma); u32 (*mk_ptentries)(phys_addr_t page, int prot); - phys_addr_t (*dte_addr_phys)(u32 addr); - u32 (*dma_addr_dte)(dma_addr_t dt_dma); u64 dma_bit_mask; + gfp_t gfp_flags; }; struct rk_iommu { @@ -278,8 +277,8 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) /* * In v2: * 31:12 - Page address bit 31:0 - * 11:9 - Page address bit 34:32 - * 8:4 - Page address bit 39:35 + * 11: 8 - Page address bit 35:32 + * 7: 4 - Page address bit 39:36 * 3 - Security * 2 - Writable * 1 - Readable @@ -506,7 +505,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) /* * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY - * and verifying that upper 5 nybbles are read back. + * and verifying that upper 5 (v1) or 7 (v2) nybbles are read back. */ for (i = 0; i < iommu->num_mmu; i++) { dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY); @@ -531,33 +530,6 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) return 0; } -static inline phys_addr_t rk_dte_addr_phys(u32 addr) -{ - return (phys_addr_t)addr; -} - -static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) -{ - return dt_dma; -} - -#define DT_HI_MASK GENMASK_ULL(39, 32) -#define DTE_BASE_HI_MASK GENMASK(11, 4) -#define DT_SHIFT 28 - -static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr) -{ - u64 addr64 = addr; - return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) | - ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT); -} - -static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma) -{ - return (dt_dma & RK_DTE_PT_ADDRESS_MASK) | - ((dt_dma & DT_HI_MASK) >> DT_SHIFT); -} - static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) { void __iomem *base = iommu->bases[index]; @@ -577,7 +549,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) page_offset = rk_iova_page_offset(iova); mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); - mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr); + mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr); dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); dte_addr = phys_to_virt(dte_addr_phys); @@ -756,7 +728,7 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (rk_dte_is_pt_valid(dte)) goto done; - page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); + page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | rk_ops->gfp_flags); if (!page_table) return ERR_PTR(-ENOMEM); @@ -967,7 +939,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu) for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, - rk_ops->dma_addr_dte(rk_domain->dt_dma)); + rk_ops->mk_dtentries(rk_domain->dt_dma)); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -1105,7 +1077,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. * Allocate one 4 KiB page for each table. */ - rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); + rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | rk_ops->gfp_flags); if (!rk_domain->dt) goto err_free_domain; @@ -1405,18 +1377,16 @@ static struct rk_iommu_ops iommu_data_ops_v1 = { .pt_address = &rk_dte_pt_address, .mk_dtentries = &rk_mk_dte, .mk_ptentries = &rk_mk_pte, - .dte_addr_phys = &rk_dte_addr_phys, - .dma_addr_dte = &rk_dma_addr_dte, .dma_bit_mask = DMA_BIT_MASK(32), + .gfp_flags = GFP_DMA32, }; static struct rk_iommu_ops iommu_data_ops_v2 = { .pt_address = &rk_dte_pt_address_v2, .mk_dtentries = &rk_mk_dte_v2, .mk_ptentries = &rk_mk_pte_v2, - .dte_addr_phys = &rk_dte_addr_phys_v2, - .dma_addr_dte = &rk_dma_addr_dte_v2, .dma_bit_mask = DMA_BIT_MASK(40), + .gfp_flags = 0, }; static const struct of_device_id rk_iommu_dt_ids[] = { diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 39e34fdeccda..2fa9afebd4f5 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -14,6 +14,7 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/regmap.h> #include <linux/slab.h> @@ -148,6 +149,7 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = SZ_256M - 1; + dom->domain.geometry.force_aperture = true; return &dom->domain; } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 1cbf063ccf14..e445f80d0226 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -9,7 +9,7 @@ #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/of.h> -#include <linux/of_device.h> +#include <linux/of_platform.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 3551ed057774..17dcd826f5c2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -13,7 +13,7 @@ #include <linux/interval_tree.h> #include <linux/iommu.h> #include <linux/module.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/pci.h> #include <linux/virtio.h> #include <linux/virtio_config.h> |