diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-30 23:00:38 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-30 23:00:38 +0300 |
commit | 58390c8ce1bddb6c623f62e7ed36383e7fa5c02f (patch) | |
tree | a2abd81e21456b1e2bcbc3bac4dd94650b7a18ec /drivers/iommu/intel | |
parent | 7acc1372113083fa281ba426021801e2402caca1 (diff) | |
parent | e51b4198396cd715b140c0e8b259680429ff0cfb (diff) | |
download | linux-58390c8ce1bddb6c623f62e7ed36383e7fa5c02f.tar.xz |
Merge tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
- Convert to platform remove callback returning void
- Extend changing default domain to normal group
- Intel VT-d updates:
- Remove VT-d virtual command interface and IOASID
- Allow the VT-d driver to support non-PRI IOPF
- Remove PASID supervisor request support
- Various small and misc cleanups
- ARM SMMU updates:
- Device-tree binding updates:
* Allow Qualcomm GPU SMMUs to accept relevant clock properties
* Document Qualcomm 8550 SoC as implementing an MMU-500
* Favour new "qcom,smmu-500" binding for Adreno SMMUs
- Fix S2CR quirk detection on non-architectural Qualcomm SMMU
implementations
- Acknowledge SMMUv3 PRI queue overflow when consuming events
- Document (in a comment) why ATS is disabled for bypass streams
- AMD IOMMU updates:
- 5-level page-table support
- NUMA awareness for memory allocations
- Unisoc driver: Support for reattaching an existing domain
- Rockchip driver: Add missing set_platform_dma_ops callback
- Mediatek driver: Adjust the dma-ranges
- Various other small fixes and cleanups
* tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (82 commits)
iommu: Remove iommu_group_get_by_id()
iommu: Make iommu_release_device() static
iommu/vt-d: Remove BUG_ON in dmar_insert_dev_scope()
iommu/vt-d: Remove a useless BUG_ON(dev->is_virtfn)
iommu/vt-d: Remove BUG_ON in map/unmap()
iommu/vt-d: Remove BUG_ON when domain->pgd is NULL
iommu/vt-d: Remove BUG_ON in handling iotlb cache invalidation
iommu/vt-d: Remove BUG_ON on checking valid pfn range
iommu/vt-d: Make size of operands same in bitwise operations
iommu/vt-d: Remove PASID supervisor request support
iommu/vt-d: Use non-privileged mode for all PASIDs
iommu/vt-d: Remove extern from function prototypes
iommu/vt-d: Do not use GFP_ATOMIC when not needed
iommu/vt-d: Remove unnecessary checks in iopf disabling path
iommu/vt-d: Move PRI handling to IOPF feature path
iommu/vt-d: Move pfsid and ats_qdep calculation to device probe path
iommu/vt-d: Move iopf code from SVA to IOPF enabling path
iommu/vt-d: Allow SVA with device-specific IOPF
dmaengine: idxd: Add enable/disable device IOPF feature
arm64: dts: mt8186: Add dma-ranges for the parent "soc" node
...
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/Kconfig | 1 | ||||
-rw-r--r-- | drivers/iommu/intel/cap_audit.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/dmar.c | 13 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 277 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 35 | ||||
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 43 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.h | 7 | ||||
-rw-r--r-- | drivers/iommu/intel/svm.c | 3 |
9 files changed, 141 insertions, 242 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 12e1e90fdae1..2e56bd79f589 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -18,7 +18,6 @@ config INTEL_IOMMU select NEED_DMA_MAP_STATE select DMAR_TABLE select SWIOTLB - select IOASID select PCI_ATS select PCI_PRI select PCI_PASID diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index 806986696841..9862dc20b35e 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -54,7 +54,6 @@ static inline void check_dmar_capabilities(struct intel_iommu *a, CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); @@ -101,7 +100,6 @@ static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23828d189c2a..a3414afe11b0 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -127,8 +127,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) struct pci_dev *tmp; struct dmar_pci_notify_info *info; - BUG_ON(dev->is_virtfn); - /* * Ignore devices that have a domain number higher than what can * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 @@ -264,7 +262,8 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, get_device(dev)); return 1; } - BUG_ON(i >= devices_cnt); + if (WARN_ON(i >= devices_cnt)) + return -EINVAL; } return 0; @@ -993,8 +992,6 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } - if (ecap_vcs(iommu->ecap)) - iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), @@ -1690,7 +1687,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) * is present. */ if (ecap_smts(iommu->ecap)) - val |= (1 << 11) | 1; + val |= BIT_ULL(11) | BIT_ULL(0); raw_spin_lock_irqsave(&iommu->register_lock, flags); @@ -1961,7 +1958,7 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, return 0; } - if (pasid == INVALID_IOASID) + if (pasid == IOMMU_PASID_INVALID) pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), @@ -2042,7 +2039,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : INVALID_IOASID, + pasid_present ? pasid : IOMMU_PASID_INVALID, source_id, guest_addr); fault_index++; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2f4bd33582..b871a6afd803 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -876,7 +876,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, return; } /* For request-without-pasid, get the pasid from context entry */ - if (intel_iommu_sm && pasid == INVALID_IOASID) + if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) pasid = PASID_RID2PASID; dir_index = pasid >> PASID_PDE_SHIFT; @@ -915,8 +915,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, int level = agaw_to_level(domain->agaw); int offset; - BUG_ON(!domain->pgd); - if (!domain_pfn_supported(domain, pfn)) /* Address beyond IOMMU's addressing capabilities. */ return NULL; @@ -1005,9 +1003,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned int large_page; struct dma_pte *first_pte, *pte; - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ do { @@ -1166,9 +1164,9 @@ next: static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn, struct list_head *freelist) { - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ dma_pte_clear_level(domain, agaw_to_level(domain->agaw), @@ -1272,7 +1270,9 @@ static void __iommu_flush_context(struct intel_iommu *iommu, | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); break; default: - BUG(); + pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", + iommu->name, type); + return; } val |= DMA_CCMD_ICC; @@ -1308,7 +1308,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, val_iva = size_order | addr; break; default: - BUG(); + pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", + iommu->name, type); + return; } /* Note: set drain read/write */ #if 0 @@ -1406,20 +1408,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - /* For IOMMU that supports device IOTLB throttling (DIT), we assign - * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge - * queue depth at PF level. If DIT is not set, PFSID will be treated as - * reserved, which should be set to 0. - */ - if (!ecap_dit(info->iommu->ecap)) - info->pfsid = 0; - else { - struct pci_dev *pf_pdev; - - /* pdev will be returned if device is not a vf */ - pf_pdev = pci_physfn(pdev); - info->pfsid = pci_dev_id(pf_pdev); - } /* The PCIe spec, in its wisdom, declares that the behaviour of the device if you enable PASID support after ATS support is @@ -1429,16 +1417,10 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (info->pri_supported && - (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && - !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) - info->pri_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); - info->ats_qdep = pci_ats_queue_depth(pdev); } } @@ -1457,11 +1439,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) domain_update_iotlb(info->domain); } - if (info->pri_enabled) { - pci_disable_pri(pdev); - info->pri_enabled = 0; - } - if (info->pasid_enabled) { pci_disable_pasid(pdev); info->pasid_enabled = 0; @@ -1508,7 +1485,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain_id_iommu(domain, iommu); - BUG_ON(pages == 0); + if (WARN_ON(!pages)) + return; if (ih) ih = 1 << 6; @@ -1722,9 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } - if (vccap_pasid(iommu->vccap)) - ioasid_unregister_allocator(&iommu->pasid_allocator); - #endif } @@ -1895,7 +1870,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) */ static inline void context_set_sm_dte(struct context_entry *context) { - context->lo |= (1 << 2); + context->lo |= BIT_ULL(2); } /* @@ -1904,7 +1879,7 @@ static inline void context_set_sm_dte(struct context_entry *context) */ static inline void context_set_sm_pre(struct context_entry *context) { - context->lo |= (1 << 4); + context->lo |= BIT_ULL(4); } /* Convert value to context PASID directory size field coding. */ @@ -1930,8 +1905,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - BUG_ON(!domain->pgd); - spin_lock(&iommu->lock); ret = -ENOMEM; context = iommu_context_addr(iommu, bus, devfn, 1); @@ -2183,7 +2156,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t pteval; u64 attr; - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); + if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1))) + return -EINVAL; if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; @@ -2341,8 +2315,6 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - if (pasid != PASID_RID2PASID) - flags |= PASID_FLAG_SUPERVISOR_MODE; if (level == 5) flags |= PASID_FLAG_FL5LP; @@ -2797,85 +2769,6 @@ out_unmap: return ret; } -#ifdef CONFIG_INTEL_IOMMU_SVM -static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) -{ - struct intel_iommu *iommu = data; - ioasid_t ioasid; - - if (!iommu) - return INVALID_IOASID; - /* - * VT-d virtual command interface always uses the full 20 bit - * PASID range. Host can partition guest PASID range based on - * policies but it is out of guest's control. - */ - if (min < PASID_MIN || max > intel_pasid_max_id) - return INVALID_IOASID; - - if (vcmd_alloc_pasid(iommu, &ioasid)) - return INVALID_IOASID; - - return ioasid; -} - -static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) -{ - struct intel_iommu *iommu = data; - - if (!iommu) - return; - /* - * Sanity check the ioasid owner is done at upper layer, e.g. VFIO - * We can only free the PASID when all the devices are unbound. - */ - if (ioasid_find(NULL, ioasid, NULL)) { - pr_alert("Cannot free active IOASID %d\n", ioasid); - return; - } - vcmd_free_pasid(iommu, ioasid); -} - -static void register_pasid_allocator(struct intel_iommu *iommu) -{ - /* - * If we are running in the host, no need for custom allocator - * in that PASIDs are allocated from the host system-wide. - */ - if (!cap_caching_mode(iommu->cap)) - return; - - if (!sm_supported(iommu)) { - pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); - return; - } - - /* - * Register a custom PASID allocator if we are running in a guest, - * guest PASID must be obtained via virtual command interface. - * There can be multiple vIOMMUs in each guest but only one allocator - * is active. All vIOMMU allocators will eventually be calling the same - * host allocator. - */ - if (!vccap_pasid(iommu->vccap)) - return; - - pr_info("Register custom PASID allocator\n"); - iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; - iommu->pasid_allocator.free = intel_vcmd_ioasid_free; - iommu->pasid_allocator.pdata = (void *)iommu; - if (ioasid_register_allocator(&iommu->pasid_allocator)) { - pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); - /* - * Disable scalable mode on this IOMMU if there - * is no custom allocator. Mixing SM capable vIOMMU - * and non-SM vIOMMU are not supported. - */ - intel_iommu_sm = 0; - } -} -#endif - static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -2964,9 +2857,6 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); -#ifdef CONFIG_INTEL_IOMMU_SVM - register_pasid_allocator(iommu); -#endif iommu_set_root_entry(iommu); } @@ -3760,8 +3650,8 @@ static ssize_t version_show(struct device *dev, { struct intel_iommu *iommu = dev_to_intel_iommu(dev); u32 ver = readl(iommu->reg + DMAR_VER_REG); - return sprintf(buf, "%d:%d\n", - DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); + return sysfs_emit(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); } static DEVICE_ATTR_RO(version); @@ -3769,7 +3659,7 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->reg_phys); + return sysfs_emit(buf, "%llx\n", iommu->reg_phys); } static DEVICE_ATTR_RO(address); @@ -3777,7 +3667,7 @@ static ssize_t cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->cap); + return sysfs_emit(buf, "%llx\n", iommu->cap); } static DEVICE_ATTR_RO(cap); @@ -3785,7 +3675,7 @@ static ssize_t ecap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->ecap); + return sysfs_emit(buf, "%llx\n", iommu->ecap); } static DEVICE_ATTR_RO(ecap); @@ -3793,7 +3683,7 @@ static ssize_t domains_supported_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); + return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); } static DEVICE_ATTR_RO(domains_supported); @@ -3801,8 +3691,9 @@ static ssize_t domains_used_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, - cap_ndoms(iommu->cap))); + return sysfs_emit(buf, "%d\n", + bitmap_weight(iommu->domain_ids, + cap_ndoms(iommu->cap))); } static DEVICE_ATTR_RO(domains_used); @@ -4340,8 +4231,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, - GFP_ATOMIC)); + if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, + &level, GFP_ATOMIC))) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4521,6 +4413,17 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + + /* + * For IOMMU that supports device IOTLB throttling + * (DIT), we assign PFSID to the invalidation desc + * of a VF such that IOMMU HW can gauge queue depth + * at PF level. If DIT is not set, PFSID will be + * treated as reserved, which should be set to 0. + */ + if (ecap_dit(iommu->ecap)) + info->pfsid = pci_dev_id(pci_physfn(pdev)); + info->ats_qdep = pci_ats_queue_depth(pdev); } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { @@ -4638,7 +4541,6 @@ static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; - int ret; if (!info || dmar_disabled) return -EINVAL; @@ -4650,45 +4552,102 @@ static int intel_iommu_enable_sva(struct device *dev) if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) return -ENODEV; - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + if (!info->pasid_enabled || !info->ats_enabled) return -EINVAL; - ret = iopf_queue_add_device(iommu->iopf_queue, dev); - if (ret) - return ret; + /* + * Devices having device-specific I/O fault handling should not + * support PCI/PRI. The IOMMU side has no means to check the + * capability of device-specific IOPF. Therefore, IOMMU can only + * default that if the device driver enables SVA on a non-PRI + * device, it will handle IOPF in its own way. + */ + if (!info->pri_supported) + return 0; - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) - iopf_queue_remove_device(iommu->iopf_queue, dev); + /* Devices supporting PRI should have it enabled. */ + if (!info->pri_enabled) + return -EINVAL; - return ret; + return 0; } -static int intel_iommu_disable_sva(struct device *dev) +static int intel_iommu_enable_iopf(struct device *dev) { + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; + struct intel_iommu *iommu; int ret; - ret = iommu_unregister_device_fault_handler(dev); + if (!pdev || !info || !info->ats_enabled || !info->pri_supported) + return -ENODEV; + + if (info->pri_enabled) + return -EBUSY; + + iommu = info->iommu; + if (!iommu) + return -EINVAL; + + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return -EINVAL; + + ret = pci_reset_pri(pdev); + if (ret) + return ret; + + ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + goto iopf_remove_device; + + ret = pci_enable_pri(pdev, PRQ_DEPTH); if (ret) - iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + goto iopf_unregister_handler; + info->pri_enabled = 1; + + return 0; + +iopf_unregister_handler: + iommu_unregister_device_fault_handler(dev); +iopf_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } -static int intel_iommu_enable_iopf(struct device *dev) +static int intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; - if (info && info->pri_supported) - return 0; + if (!info->pri_enabled) + return -EINVAL; + + /* + * PCIe spec states that by clearing PRI enable bit, the Page + * Request Interface will not issue new page requests, but has + * outstanding page requests that have been transmitted or are + * queued for transmission. This is supposed to be called after + * the device driver has stopped DMA, all PASIDs have been + * unbound and the outstanding PRQs have been drained. + */ + pci_disable_pri(to_pci_dev(dev)); + info->pri_enabled = 0; - return -ENODEV; + /* + * With PRI disabled and outstanding PRQs drained, unregistering + * fault handler and removing device from iopf queue should never + * fail. + */ + WARN_ON(iommu_unregister_device_fault_handler(dev)); + WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); + + return 0; } static int @@ -4711,10 +4670,10 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return 0; + return intel_iommu_disable_iopf(dev); case IOMMU_DEV_FEAT_SVA: - return intel_iommu_disable_sva(dev); + return 0; default: return -ENODEV; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 694ab9b7d3e9..1c5e1d88862b 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -19,7 +19,6 @@ #include <linux/iommu.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmar.h> -#include <linux/ioasid.h> #include <linux/bitfield.h> #include <linux/xarray.h> #include <linux/perf_event.h> @@ -198,7 +197,6 @@ #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) #define ecap_slads(e) (((e) >> 45) & 0x1) -#define ecap_vcs(e) (((e) >> 44) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) (((e) >> 41) & 0x1) #define ecap_pds(e) (((e) >> 42) & 0x1) @@ -678,7 +676,6 @@ struct intel_iommu { unsigned char prq_name[16]; /* Name for PRQ interrupt */ unsigned long prq_seq_number; struct completion prq_complete; - struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; @@ -798,18 +795,18 @@ static inline bool context_present(struct context_entry *context) return (context->lo & 1); } -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); +struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void dmar_disable_qi(struct intel_iommu *iommu); -extern int dmar_reenable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); +int dmar_enable_qi(struct intel_iommu *iommu); +void dmar_disable_qi(struct intel_iommu *iommu); +int dmar_reenable_qi(struct intel_iommu *iommu); +void qi_global_iec(struct intel_iommu *iommu); -extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); -extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, +void qi_flush_context(struct intel_iommu *iommu, u16 did, + u16 sid, u8 fm, u64 type); +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, u16 qdep, u64 addr, unsigned mask); void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, @@ -832,7 +829,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) -extern int dmar_ir_support(void); +int dmar_ir_support(void); void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); @@ -840,9 +837,9 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM -extern void intel_svm_check(struct intel_iommu *iommu); -extern int intel_svm_enable_prq(struct intel_iommu *iommu); -extern int intel_svm_finish_prq(struct intel_iommu *iommu); +void intel_svm_check(struct intel_iommu *iommu); +int intel_svm_enable_prq(struct intel_iommu *iommu); +int intel_svm_finish_prq(struct intel_iommu *iommu); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); @@ -889,8 +886,8 @@ extern const struct iommu_ops intel_iommu_ops; #ifdef CONFIG_INTEL_IOMMU extern int intel_iommu_sm; -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +int iommu_calculate_agaw(struct intel_iommu *iommu); +int iommu_calculate_max_sagaw(struct intel_iommu *iommu); int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob); static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index df9e261af0b5..a1b987335b31 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -548,7 +548,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_table; } - bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC); + bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL); if (bitmap == NULL) { pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); goto out_free_pages; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 633e0a4a01e7..c5d479770e12 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -336,15 +336,6 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe) } /* - * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_sre(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 0, 1); -} - -/* * Setup the WPE(Write Protect Enable) field (Bit 132) of a * scalable mode PASID entry. */ @@ -521,23 +512,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EINVAL; } - if (flags & PASID_FLAG_SUPERVISOR_MODE) { -#ifdef CONFIG_X86 - unsigned long cr0 = read_cr0(); - - /* CR0.WP is normally set but just to be sure */ - if (unlikely(!(cr0 & X86_CR0_WP))) { - pr_err("No CPU write protect!\n"); - return -EINVAL; - } -#endif - if (!ecap_srs(iommu->ecap)) { - pr_err("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } - } - if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { pr_err("No 5-level paging support for first-level on %s\n", iommu->name); @@ -560,10 +534,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, /* Setup the first level page table pointer: */ pasid_set_flptr(pte, (u64)__pa(pgd)); - if (flags & PASID_FLAG_SUPERVISOR_MODE) { - pasid_set_sre(pte); - pasid_set_wpe(pte); - } if (flags & PASID_FLAG_FL5LP) pasid_set_flpm(pte, 1); @@ -658,12 +628,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - /* - * Since it is a second level only translation setup, we should - * set SRE bit as well (addresses are expected to be GPAs). - */ - if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) - pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -700,13 +664,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - - /* - * We should set SRE bit as well since the addresses are expected - * to be GPAs. - */ - if (ecap_srs(iommu->ecap)) - pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 20c54e50f533..d6b7d21244b1 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -41,13 +41,6 @@ #define FLPT_DEFAULT_DID 1 #define NUM_RESERVED_DID 2 -/* - * The SUPERVISOR_MODE flag indicates a first level translation which - * can be used for access to kernel addresses. It is valid only for - * access to the kernel's static 1:1 mapping of physical memory — not - * to vmalloc or even module mappings. - */ -#define PASID_FLAG_SUPERVISOR_MODE BIT(0) #define PASID_FLAG_NESTED BIT(1) #define PASID_FLAG_PAGE_SNOOP BIT(2) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7367f56c3bad..e95b339e9cdc 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/mm_types.h> #include <linux/xarray.h> -#include <linux/ioasid.h> #include <asm/page.h> #include <asm/fpu/api.h> @@ -273,7 +272,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (WARN_ON(!mutex_is_locked(&pasid_mutex))) return -EINVAL; - if (pasid == INVALID_IOASID || pasid >= PASID_MAX) + if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) return -EINVAL; svm = pasid_private_find(pasid); |