diff options
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/Kconfig | 12 | ||||
-rw-r--r-- | drivers/iommu/intel/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/dmar.c | 26 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 551 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 21 | ||||
-rw-r--r-- | drivers/iommu/intel/nested.c | 16 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 210 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.h | 3 | ||||
-rw-r--r-- | drivers/iommu/intel/perf.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/svm.c | 76 |
10 files changed, 613 insertions, 306 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 012cd2541a68..6cf9f48e7d8c 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -51,6 +51,7 @@ config INTEL_IOMMU_SVM depends on X86_64 select MMU_NOTIFIER select IOMMU_SVA + select IOMMU_IOPF help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by @@ -64,17 +65,6 @@ config INTEL_IOMMU_DEFAULT_ON one is found. If this option is not selected, DMAR support can be enabled by passing intel_iommu=on to the kernel. -config INTEL_IOMMU_BROKEN_GFX_WA - bool "Workaround broken graphics drivers (going away soon)" - depends on BROKEN && X86 - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA, at least until this - option is removed in the 2.6.32 kernel. - config INTEL_IOMMU_FLOPPY_WA def_bool y depends on X86 diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 5dabf081a779..5402b699a122 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -5,5 +5,7 @@ obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o +ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +endif obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a9a..36d7427b1202 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1095,7 +1095,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; iommu->segment = drhd->segment; - + iommu->device_rbtree = RB_ROOT; + spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); @@ -1271,6 +1273,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; + struct device *dev; + u64 iqe_err, ite_sid; struct q_inval *qi = iommu->qi; int shift = qi_shift(iommu); @@ -1315,6 +1319,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; + /* + * SID field is valid only when the ITE field is Set in FSTS_REG + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + ite_sid = DMAR_IQER_REG_ITESID(iqe_err); + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); pr_info("Invalidation Time-out Error (ITE) cleared\n"); @@ -1324,6 +1335,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) head = (head - 2 + QI_LENGTH) % QI_LENGTH; } while (head != tail); + /* + * If device was released or isn't present, no need to retry + * the ATS invalidate request anymore. + * + * 0 value of ite_sid means old VT-d device, no ite_sid value. + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + if (ite_sid) { + dev = device_rbtree_find(iommu, ite_sid); + if (!dev || !dev_is_pci(dev) || + !pci_device_is_present(to_pci_dev(dev))) + return -ETIMEDOUT; + } if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..50eb9aed47cc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,6 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" @@ -97,6 +96,81 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } +static int device_rid_cmp_key(const void *key, const struct rb_node *node) +{ + struct device_domain_info *info = + rb_entry(node, struct device_domain_info, node); + const u16 *rid_lhs = key; + + if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) + return -1; + + if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) + return 1; + + return 0; +} + +static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) +{ + struct device_domain_info *info = + rb_entry(lhs, struct device_domain_info, node); + u16 key = PCI_DEVID(info->bus, info->devfn); + + return device_rid_cmp_key(&key, rhs); +} + +/* + * Looks up an IOMMU-probed device using its source ID. + * + * Returns the pointer to the device if there is a match. Otherwise, + * returns NULL. + * + * Note that this helper doesn't guarantee that the device won't be + * released by the iommu subsystem after being returned. The caller + * should use its own synchronization mechanism to avoid the device + * being released during its use if its possibly the case. + */ +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) +{ + struct device_domain_info *info = NULL; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); + if (node) + info = rb_entry(node, struct device_domain_info, node); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + + return info ? info->dev : NULL; +} + +static int device_rbtree_insert(struct intel_iommu *iommu, + struct device_domain_info *info) +{ + struct rb_node *curr; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + if (WARN_ON(curr)) + return -EEXIST; + + return 0; +} + +static void device_rbtree_remove(struct device_domain_info *info) +{ + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + rb_erase(&info->node, &iommu->device_rbtree); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -396,8 +470,6 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } -static void domain_update_iotlb(struct dmar_domain *domain); - /* Return the super pagesize bitmap if supported. */ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) { @@ -1218,7 +1290,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -static void domain_update_iotlb(struct dmar_domain *domain) +void domain_update_iotlb(struct dmar_domain *domain) { struct dev_pasid_info *dev_pasid; struct device_domain_info *info; @@ -1368,6 +1440,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, spin_unlock_irqrestore(&domain->lock, flags); } +static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + unsigned long pfn, unsigned int pages, + int ih) +{ + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned long bitmask = aligned_pages - 1; + unsigned int mask = ilog2(aligned_pages); + u64 addr = (u64)pfn << VTD_PAGE_SHIFT; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + + /* + * Fallback to domain selective flush if no PSI support or + * the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1384,42 +1496,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain->use_first_level) { + if (domain->use_first_level) domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); - } else { - unsigned long bitmask = aligned_pages - 1; - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - - /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. - */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; - } - - /* - * Fallback to domain selective flush if no PSI support or - * the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + else + __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); /* * In caching mode, changes of pages from non-present to present require @@ -1443,6 +1523,46 @@ static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain * iommu_flush_write_buffer(iommu); } +/* + * Flush the relevant caches in nested translation if the domain + * also serves as a parent + */ +static void parent_domain_flush(struct dmar_domain *domain, + unsigned long pfn, + unsigned long pages, int ih) +{ + struct dmar_domain *s1_domain; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + struct device_domain_info *device_info; + struct iommu_domain_info *info; + unsigned long flags; + unsigned long i; + + xa_for_each(&s1_domain->iommu_array, i, info) + __iommu_flush_iotlb_psi(info->iommu, info->did, + pfn, pages, ih); + + if (!s1_domain->has_iotlb_device) + continue; + + spin_lock_irqsave(&s1_domain->lock, flags); + list_for_each_entry(device_info, &s1_domain->devices, link) + /* + * Address translation cache in device side caches the + * result of nested translation. There is no easy way + * to identify the exact set of nested translations + * affected by a change in S2. So just flush the entire + * device cache. + */ + __iommu_flush_dev_iotlb(device_info, 0, + MAX_AGAW_PFN_WIDTH); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); +} + static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); @@ -1462,6 +1582,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) if (!cap_caching_mode(iommu->cap)) iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } + + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, 0, -1, 0); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1727,34 +1850,17 @@ static void domain_exit(struct dmar_domain *domain) kfree(domain); } -/* - * Get the PASID directory size for scalable mode context entry. - * Value of X in the PDTS field of a scalable mode context entry - * indicates PASID directory with 2^(X + 7) entries. - */ -static unsigned long context_get_sm_pds(struct pasid_table *table) -{ - unsigned long pds, max_pde; - - max_pde = table->max_pasid >> PASID_PDE_SHIFT; - pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); - if (pds < 7) - return 0; - - return pds - 7; -} - static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, - struct pasid_table *table, u8 bus, u8 devfn) { struct device_domain_info *info = domain_lookup_dev_info(domain, iommu, bus, devfn); u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; + struct dma_pte *pgd = domain->pgd; struct context_entry *context; - int ret; + int agaw, ret; if (hw_pass_through && domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; @@ -1797,65 +1903,37 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_clear_entry(context); + context_set_domain_id(context, did); - if (sm_supported(iommu)) { - unsigned long pds; - - /* Setup the PASID DIR pointer: */ - pds = context_get_sm_pds(table); - context->lo = (u64)virt_to_phys(table->table) | - context_pdts(pds); - - /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, IOMMU_NO_PASID); - + if (translation != CONTEXT_TT_PASS_THROUGH) { /* - * Setup the Device-TLB enable bit and Page request - * Enable bit: + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. */ - if (info && info->ats_supported) - context_set_sm_dte(context); - if (info && info->pri_supported) - context_set_sm_pre(context); - if (info && info->pasid_supported) - context_set_pasid(context); - } else { - struct dma_pte *pgd = domain->pgd; - int agaw; - - context_set_domain_id(context, did); - - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; - - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } - context_set_translation_type(context, translation); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); } + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); if (!ecap_coherent(iommu->ecap)) @@ -1885,43 +1963,29 @@ out_unlock: return ret; } -struct domain_context_mapping_data { - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct pasid_table *table; -}; - static int domain_context_mapping_cb(struct pci_dev *pdev, u16 alias, void *opaque) { - struct domain_context_mapping_data *data = opaque; + struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain = opaque; - return domain_context_mapping_one(data->domain, data->iommu, - data->table, PCI_BUS_NUM(alias), - alias & 0xff); + return domain_context_mapping_one(domain, iommu, + PCI_BUS_NUM(alias), alias & 0xff); } static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct domain_context_mapping_data data; struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; - struct pasid_table *table; - - table = intel_pasid_get_table(dev); if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, table, - bus, devfn); - - data.domain = domain; - data.iommu = iommu; - data.table = table; + return domain_context_mapping_one(domain, iommu, bus, devfn); return pci_for_each_dma_alias(to_pci_dev(dev), - &domain_context_mapping_cb, &data); + domain_context_mapping_cb, domain); } /* Returns a number of VTD pages, but aligned to MM page size */ @@ -1985,6 +2049,9 @@ static void switch_to_super_page(struct dmar_domain *domain, iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); + if (domain->nested_parent) + parent_domain_flush(domain, start_pfn, + lvl_pages, 0); } pte++; @@ -2108,9 +2175,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 struct context_entry *context; u16 did_old; - if (!iommu) - return; - spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) { @@ -2118,14 +2182,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - if (sm_supported(iommu)) { - if (hw_pass_through && domain_type_is_si(info->domain)) - did_old = FLPT_DEFAULT_DID; - else - did_old = domain_id_iommu(info->domain, iommu); - } else { - did_old = context_domain_id(context); - } + did_old = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); @@ -2136,9 +2193,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - if (sm_supported(iommu)) - qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); - iommu->flush.flush_iotlb(iommu, did_old, 0, @@ -2278,28 +2332,19 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); - /* PASID table is mandatory for a PCI device in scalable mode. */ - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - /* Setup the PASID entry for requests without PASID: */ - if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, - dev, IOMMU_NO_PASID); - else if (domain->use_first_level) - ret = domain_setup_first_level(iommu, domain, dev, - IOMMU_NO_PASID); - else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, IOMMU_NO_PASID); - if (ret) { - dev_err(dev, "Setup RID2PASID failed\n"); - device_block_translation(dev); - return ret; - } - } + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (!sm_supported(iommu)) + ret = domain_context_mapping(domain, dev); + else if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + else if (domain->use_first_level) + ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - ret = domain_context_mapping(domain, dev); if (ret) { - dev_err(dev, "Domain context map failed\n"); device_block_translation(dev); return ret; } @@ -2660,10 +2705,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } -#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - dmar_map_gfx = 0; -#endif - if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; @@ -3747,30 +3788,6 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *domain = info->domain; - struct intel_iommu *iommu = info->iommu; - unsigned long flags; - - if (!dev_is_real_dma_subdevice(info->dev)) { - if (dev_is_pci(info->dev) && sm_supported(iommu)) - intel_pasid_tear_down_entry(iommu, info->dev, - IOMMU_NO_PASID, false); - - iommu_disable_pci_caps(info); - domain_context_clear(info); - } - - spin_lock_irqsave(&domain->lock, flags); - list_del(&info->link); - spin_unlock_irqrestore(&domain->lock, flags); - - domain_detach_iommu(domain, iommu); - info->domain = NULL; -} - /* * Clear the page table pointer in context or pasid table entries so that * all DMA requests without PASID from the device are blocked. If the page @@ -3883,6 +3900,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; /* Must be NESTING domain */ @@ -3908,11 +3926,16 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (!domain) return ERR_PTR(-ENOMEM); - if (nested_parent) - to_dmar_domain(domain)->nested_parent = true; + dmar_domain = to_dmar_domain(domain); + + if (nested_parent) { + dmar_domain->nested_parent = true; + INIT_LIST_HEAD(&dmar_domain->s1_domains); + spin_lock_init(&dmar_domain->s1_lock); + } if (dirty_tracking) { - if (to_dmar_domain(domain)->use_first_level) { + if (dmar_domain->use_first_level) { iommu_domain_free(domain); return ERR_PTR(-EOPNOTSUPP); } @@ -3924,8 +3947,12 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, static void intel_iommu_domain_free(struct iommu_domain *domain) { + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + WARN_ON(dmar_domain->nested_parent && + !list_empty(&dmar_domain->s1_domains)); if (domain != &si_domain->domain) - domain_exit(to_dmar_domain(domain)); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, @@ -3965,6 +3992,10 @@ int prepare_domain_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + context_copied(iommu, info->bus, info->devfn)) + return intel_pasid_setup_sm_context(dev); + return 0; } @@ -4107,6 +4138,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, start_pfn, nrpages, + list_empty(&gather->freelist)); put_pages_list(&gather->freelist); } @@ -4265,26 +4299,50 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } dev_iommu_priv_set(dev, info); + ret = device_rbtree_insert(iommu, info); + if (ret) + goto free; if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - kfree(info); - return ERR_PTR(ret); + goto clear_rbtree; + } + + if (!context_copied(iommu, info->bus, info->devfn)) { + ret = intel_pasid_setup_sm_context(dev); + if (ret) + goto free_table; } } intel_iommu_debugfs_create_dev(info); return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + mutex_lock(&iommu->iopf_lock); + device_rbtree_remove(info); + mutex_unlock(&iommu->iopf_lock); + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + !context_copied(iommu, info->bus, info->devfn)) + intel_pasid_teardown_sm_context(dev); - dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); @@ -4427,23 +4485,15 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) - goto iopf_remove_device; - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto iopf_unregister_handler; + if (ret) { + iopf_queue_remove_device(iommu->iopf_queue, dev); + return ret; + } + info->pri_enabled = 1; return 0; - -iopf_unregister_handler: - iommu_unregister_device_fault_handler(dev); -iopf_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4464,14 +4514,7 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - - /* - * With PRI disabled and outstanding PRQs drained, unregistering - * fault handler and removing device from iopf queue should never - * fail. - */ - WARN_ON(iommu_unregister_device_fault_handler(dev)); - WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); + iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } @@ -4664,21 +4707,70 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) return vtd; } +/* + * Set dirty tracking for the device list of a domain. The caller must + * hold the domain->lock when calling it. + */ +static int device_set_dirty_tracking(struct list_head *devices, bool enable) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, devices, link) { + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, + IOMMU_NO_PASID, enable); + if (ret) + break; + } + + return ret; +} + +static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, + bool enable) +{ + struct dmar_domain *s1_domain; + unsigned long flags; + int ret; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + ret = device_set_dirty_tracking(&s1_domain->devices, enable); + spin_unlock_irqrestore(&s1_domain->lock, flags); + if (ret) + goto err_unwind; + } + spin_unlock(&domain->s1_lock); + return 0; + +err_unwind: + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + device_set_dirty_tracking(&s1_domain->devices, + domain->dirty_tracking); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); + return ret; +} + static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct device_domain_info *info; int ret; spin_lock(&dmar_domain->lock); if (dmar_domain->dirty_tracking == enable) goto out_unlock; - list_for_each_entry(info, &dmar_domain->devices, link) { - ret = intel_pasid_setup_dirty_tracking(info->iommu, - info->domain, info->dev, - IOMMU_NO_PASID, enable); + ret = device_set_dirty_tracking(&dmar_domain->devices, enable); + if (ret) + goto err_unwind; + + if (dmar_domain->nested_parent) { + ret = parent_domain_set_dirty_tracking(dmar_domain, enable); if (ret) goto err_unwind; } @@ -4690,10 +4782,8 @@ out_unlock: return 0; err_unwind: - list_for_each_entry(info, &dmar_domain->devices, link) - intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, - info->dev, IOMMU_NO_PASID, - dmar_domain->dirty_tracking); + device_set_dirty_tracking(&dmar_domain->devices, + dmar_domain->dirty_tracking); spin_unlock(&dmar_domain->lock); return ret; } @@ -4743,6 +4833,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = { const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, + .release_domain = &blocking_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59..404d2476a877 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -627,6 +627,10 @@ struct dmar_domain { int agaw; /* maximum mapped address */ u64 max_addr; + /* Protect the s1_domains list */ + spinlock_t s1_lock; + /* Track s1_domains nested on this domain */ + struct list_head s1_domains; }; /* Nested user domain */ @@ -637,6 +641,8 @@ struct dmar_domain { unsigned long s1_pgtbl; /* page table attributes */ struct iommu_hwpt_vtd_s1 s1_cfg; + /* link to parent domain siblings */ + struct list_head s2_link; }; }; @@ -713,9 +719,16 @@ struct intel_iommu { #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; + /* Synchronization between fault report and iommu device release. */ + struct mutex iopf_lock; struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ + /* rb tree for all probed devices */ + struct rb_root device_rbtree; + /* protect the device_rbtree */ + spinlock_t device_rbtree_lock; + #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ struct irq_domain *ir_domain; @@ -749,6 +762,8 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ + /* device tracking node(lookup by PCI RID) */ + struct rb_node node; #ifdef CONFIG_INTEL_IOMMU_DEBUGFS struct dentry *debugfs_dentry; /* pointer to device directory dentry */ #endif @@ -1060,6 +1075,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) +void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); @@ -1074,13 +1090,14 @@ void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, - struct iommu_page_response *msg); +void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); void intel_drain_pasid_prq(struct device *dev, u32 pasid); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index f26c7f1c46cc..a7d68f3d518a 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -65,12 +65,20 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_update_iotlb(dmar_domain); + return 0; } static void intel_nested_domain_free(struct iommu_domain *domain) { - kfree(to_dmar_domain(domain)); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_domain *s2_domain = dmar_domain->s2_domain; + + spin_lock(&s2_domain->s1_lock); + list_del(&dmar_domain->s2_link); + spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain); } static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, @@ -95,7 +103,7 @@ static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, } static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, - unsigned long npages, bool ih) + u64 npages, bool ih) { struct iommu_domain_info *info; unsigned int mask; @@ -201,5 +209,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); + spin_lock(&s2_domain->s1_lock); + list_add(&domain->s2_link, &s2_domain->s1_domains); + spin_unlock(&s2_domain->s1_lock); + return &domain->domain; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3239cefa4c33..11f0b856d74c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -214,6 +214,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; + if (pci_dev_is_disconnected(to_pci_dev(dev))) + return; + sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; pfsid = info->pfsid; @@ -428,7 +431,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Set up dirty tracking on a second only or nested translation type. */ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled) { @@ -445,7 +447,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, return -ENODEV; } - did = domain_id_iommu(domain, iommu); + did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { @@ -658,6 +660,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, s2_domain->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (s2_domain->dirty_tracking) + pasid_set_ssade(pte); pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -666,3 +670,205 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, return 0; } + +/* + * Interfaces to setup or teardown a pasid table to the scalable-mode + * context table entry: + */ + +static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context) { + spin_unlock(&iommu->lock); + return; + } + + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + spin_unlock(&iommu->lock); + + /* + * Cache invalidation for changes to a scalable-mode context table + * entry. + * + * Section 6.5.3.3 of the VT-d spec: + * - Device-selective context-cache invalidation; + * - Domain-selective PASID-cache invalidation to affected domains + * (can be skipped if all PASID entries were not-present); + * - Domain-selective IOTLB invalidation to affected domains; + * - Global Device-TLB invalidation to affected functions. + * + * The iommu has been parked in the blocking state. All domains have + * been detached from the device or PASID. The PASID and IOTLB caches + * have been invalidated during the domain detach path. + */ + iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); +} + +static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev == &pdev->dev) + device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); + + return 0; +} + +void intel_pasid_teardown_sm_context(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) { + device_pasid_table_teardown(dev, info->bus, info->devfn); + return; + } + + pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); +} + +/* + * Get the PASID directory size for scalable mode context entry. + * Value of X in the PDTS field of a scalable mode context entry + * indicates PASID directory with 2^(X + 7) entries. + */ +static unsigned long context_get_sm_pds(struct pasid_table *table) +{ + unsigned long pds, max_pde; + + max_pde = table->max_pasid >> PASID_PDE_SHIFT; + pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); + if (pds < 7) + return 0; + + return pds - 7; +} + +static int context_entry_set_pasid_table(struct context_entry *context, + struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct pasid_table *table = info->pasid_table; + struct intel_iommu *iommu = info->iommu; + unsigned long pds; + + context_clear_entry(context); + + pds = context_get_sm_pds(table); + context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); + + if (info->ats_supported) + context_set_sm_dte(context); + if (info->pri_supported) + context_set_sm_pre(context); + if (info->pasid_supported) + context_set_pasid(context); + + context_set_fault_enable(context); + context_set_present(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + + return 0; +} + +static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, true); + if (!context) { + spin_unlock(&iommu->lock); + return -ENOMEM; + } + + if (context_present(context) && !context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return 0; + } + + if (context_copied(iommu, bus, devfn)) { + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + + /* + * For kdump cases, old valid entries may be cached due to + * the in-flight DMA and copied pgtable, but there is no + * unmapping behaviour for them, thus we need explicit cache + * flushes for all affected domain IDs and PASIDs used in + * the copied PASID table. Given that we have no idea about + * which domain IDs and PASIDs were used in the copied tables, + * upgrade them to global PASID and IOTLB cache invalidation. + */ + iommu->flush.flush_context(iommu, 0, + PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + + /* + * At this point, the device is supposed to finish reset at + * its driver probe stage, so no in-flight DMA will exist, + * and we don't need to worry anymore hereafter. + */ + clear_context_copied(iommu, bus, devfn); + } + + context_entry_set_pasid_table(context, dev); + spin_unlock(&iommu->lock); + + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we don't need to flush the caches. If it does + * cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); + } + + return 0; +} + +static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev != &pdev->dev) + return 0; + + return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); +} + +/* + * Set the device's PASID table to its context table entry. + * + * The PASID table is set to the context entries of both device itself + * and its alias requester ID for DMA. + */ +int intel_pasid_setup_sm_context(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) + return device_pasid_table_setup(dev, info->bus, info->devfn); + + return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 8d40d4c66e31..da9978fef7ac 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -307,7 +307,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, @@ -319,4 +318,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, bool fault_ignore); void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, struct device *dev, u32 pasid); +int intel_pasid_setup_sm_context(struct device *dev); +void intel_pasid_teardown_sm_context(struct device *dev); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c index 94ee70ac38e3..adc4de6bbd88 100644 --- a/drivers/iommu/intel/perf.c +++ b/drivers/iommu/intel/perf.c @@ -33,7 +33,7 @@ int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) spin_lock_irqsave(&latency_lock, flags); if (!iommu->perf_statistic) { - iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + iommu->perf_statistic = kcalloc(DMAR_LATENCY_NUM, sizeof(*lstat), GFP_ATOMIC); if (!iommu->perf_statistic) { ret = -ENOMEM; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 40edd282903f..c1bed89b1026 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -22,7 +22,6 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" -#include "../iommu-sva.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -315,10 +314,11 @@ out: return 0; } -static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct iommu_domain *domain, ioasid_t pasid) +static int intel_svm_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; struct intel_svm_dev *sdev; struct intel_svm *svm; @@ -360,7 +360,6 @@ static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, sdev->iommu = iommu; sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); - init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->qdep = info->ats_qdep; if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) @@ -408,13 +407,6 @@ void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) if (svm->notifier.ops) mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(svm->pasid); - /* - * We mandate that no page faults may be outstanding - * for the PASID when intel_svm_unbind_mm() is called. - * If that is not obeyed, subtle errors will happen. - * Let's make them less subtle... - */ - memset(svm, 0x6b, sizeof(*svm)); kfree(svm); } } @@ -562,16 +554,12 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) return prot; } -static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, - struct page_req_dsc *desc) +static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, + struct page_req_dsc *desc) { - struct iommu_fault_event event; - - if (!dev || !dev_is_pci(dev)) - return -ENODEV; + struct iopf_fault event = { }; /* Fill in event data for device specific processing */ - memset(&event, 0, sizeof(struct iommu_fault_event)); event.fault.type = IOMMU_FAULT_PAGE_REQ; event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; event.fault.prm.pasid = desc->pasid; @@ -603,7 +591,7 @@ static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); } - return iommu_report_device_fault(dev, &event); + iommu_report_device_fault(dev, &event); } static void handle_bad_prq_event(struct intel_iommu *iommu, @@ -650,7 +638,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct page_req_dsc *req; int head, tail, handled; - struct pci_dev *pdev; + struct device *dev; u64 address; /* @@ -696,23 +684,22 @@ bad_req: if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) goto prq_advance; - pdev = pci_get_domain_bus_and_slot(iommu->segment, - PCI_BUS_NUM(req->rid), - req->rid & 0xff); /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev) + mutex_lock(&iommu->iopf_lock); + dev = device_rbtree_find(iommu, req->rid); + if (!dev) { + mutex_unlock(&iommu->iopf_lock); goto bad_req; + } - if (intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - else - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); - pci_dev_put(pdev); + intel_svm_prq_report(iommu, dev, req); + trace_prq_report(iommu, dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + mutex_unlock(&iommu->iopf_lock); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -742,9 +729,8 @@ prq_advance: return IRQ_RETVAL(handled); } -int intel_svm_page_response(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg) +void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; @@ -753,7 +739,6 @@ int intel_svm_page_response(struct device *dev, bool private_present; bool pasid_present; bool last_page; - int ret = 0; u16 sid; prm = &evt->fault.prm; @@ -762,16 +747,6 @@ int intel_svm_page_response(struct device *dev, private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - if (!pasid_present) { - ret = -EINVAL; - goto out; - } - - if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { - ret = -EINVAL; - goto out; - } - /* * Per VT-d spec. v3.0 ch7.7, system software must respond * with page group response if private data is present (PDP) @@ -800,17 +775,6 @@ int intel_svm_page_response(struct device *dev, qi_submit_sync(iommu, &desc, 1, 0); } -out: - return ret; -} - -static int intel_svm_set_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; - - return intel_svm_bind_mm(iommu, dev, domain, pasid); } static void intel_svm_domain_free(struct iommu_domain *domain) |