summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c154
1 files changed, 125 insertions, 29 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a8b36c3fddf1..bef8e8f7ca25 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,7 +27,7 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
-#include "../iommu-sva-lib.h"
+#include "../iommu-sva.h"
#include "pasid.h"
#include "cap_audit.h"
@@ -959,11 +959,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
- if (domain_use_first_level(domain)) {
- pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
- if (iommu_is_dma_domain(&domain->domain))
- pteval |= DMA_FL_PTE_ACCESS;
- }
+ if (domain_use_first_level(domain))
+ pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1398,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain)
spin_unlock_irqrestore(&domain->lock, flags);
}
+/*
+ * The extra devTLB flush quirk impacts those QAT devices with PCI device
+ * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
+ * check because it applies only to the built-in QAT devices and it doesn't
+ * grant additional privileges.
+ */
+#define BUGGY_QAT_DEVID_MASK 0x4940
+static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
+{
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
+ return false;
+
+ return true;
+}
+
static void iommu_enable_pci_caps(struct device_domain_info *info)
{
struct pci_dev *pdev;
@@ -1480,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, addr, mask);
+ quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep);
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -2410,6 +2427,7 @@ static int __init si_domain_init(int hw)
if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
domain_exit(si_domain);
+ si_domain = NULL;
return -EFAULT;
}
@@ -3052,6 +3070,10 @@ free_iommu:
disable_dmar_iommu(iommu);
free_dmar_iommu(iommu);
}
+ if (si_domain) {
+ domain_exit(si_domain);
+ si_domain = NULL;
+ }
return ret;
}
@@ -3851,8 +3873,10 @@ static inline bool has_external_pci(void)
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->external_facing)
+ if (pdev->external_facing) {
+ pci_dev_put(pdev);
return true;
+ }
return false;
}
@@ -4164,6 +4188,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return domain;
case IOMMU_DOMAIN_IDENTITY:
return &si_domain->domain;
+ case IOMMU_DOMAIN_SVA:
+ return intel_svm_domain_alloc();
default:
return NULL;
}
@@ -4189,19 +4215,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
return -ENODEV;
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
- return -EOPNOTSUPP;
+ return -EINVAL;
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
addr_width = cap_mgaw(iommu->cap);
- if (dmar_domain->max_addr > (1LL << addr_width)) {
- dev_err(dev, "%s: iommu width (%d) is not "
- "sufficient for the mapped address (%llx)\n",
- __func__, addr_width, dmar_domain->max_addr);
- return -EFAULT;
- }
+ if (dmar_domain->max_addr > (1LL << addr_width))
+ return -EINVAL;
dmar_domain->gaw = addr_width;
/*
@@ -4447,14 +4469,20 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
{
- if (cap == IOMMU_CAP_CACHE_COHERENCY)
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
return true;
- if (cap == IOMMU_CAP_INTR_REMAP)
+ case IOMMU_CAP_INTR_REMAP:
return irq_remapping_enabled == 1;
- if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
+ case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
-
- return false;
+ case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+ return ecap_sc_support(info->iommu->ecap);
+ default:
+ return false;
+ }
}
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
@@ -4487,9 +4515,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
if (dev_is_pci(dev)) {
if (ecap_dev_iotlb_support(iommu->ecap) &&
pci_ats_supported(pdev) &&
- dmar_ats_supported(pdev, iommu))
+ dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
-
+ info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+ }
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
@@ -4534,7 +4563,7 @@ static void intel_iommu_get_resv_regions(struct device *device,
struct device *i_dev;
int i;
- down_read(&dmar_global_lock);
+ rcu_read_lock();
for_each_rmrr_units(rmrr) {
for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
i, i_dev) {
@@ -4552,14 +4581,15 @@ static void intel_iommu_get_resv_regions(struct device *device,
IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
resv = iommu_alloc_resv_region(rmrr->base_address,
- length, prot, type);
+ length, prot, type,
+ GFP_ATOMIC);
if (!resv)
break;
list_add_tail(&resv->list, head);
}
}
- up_read(&dmar_global_lock);
+ rcu_read_unlock();
#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
if (dev_is_pci(device)) {
@@ -4567,7 +4597,8 @@ static void intel_iommu_get_resv_regions(struct device *device,
if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
- IOMMU_RESV_DIRECT_RELAXABLE);
+ IOMMU_RESV_DIRECT_RELAXABLE,
+ GFP_KERNEL);
if (reg)
list_add_tail(&reg->list, head);
}
@@ -4576,7 +4607,7 @@ static void intel_iommu_get_resv_regions(struct device *device,
reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
- 0, IOMMU_RESV_MSI);
+ 0, IOMMU_RESV_MSI, GFP_KERNEL);
if (!reg)
return;
list_add_tail(&reg->list, head);
@@ -4705,6 +4736,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
}
+static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct iommu_domain *domain;
+
+ /* Domain type specific cleanup: */
+ domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
+ if (domain) {
+ switch (domain->type) {
+ case IOMMU_DOMAIN_SVA:
+ intel_svm_remove_dev_pasid(dev, pasid);
+ break;
+ default:
+ /* should never reach here */
+ WARN_ON(1);
+ break;
+ }
+ }
+
+ intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -4717,11 +4770,9 @@ const struct iommu_ops intel_iommu_ops = {
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
+ .remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
#ifdef CONFIG_INTEL_IOMMU_SVM
- .sva_bind = intel_svm_bind,
- .sva_unbind = intel_svm_unbind,
- .sva_get_pasid = intel_svm_get_pasid,
.page_response = intel_svm_page_response,
#endif
.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -4926,3 +4977,48 @@ static void __init check_tylersburg_isoch(void)
pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
+
+/*
+ * Here we deal with a device TLB defect where device may inadvertently issue ATS
+ * invalidation completion before posted writes initiated with translated address
+ * that utilized translations matching the invalidation address range, violating
+ * the invalidation completion ordering.
+ * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
+ * vulnerable to this defect. In other words, any dTLB invalidation initiated not
+ * under the control of the trusted/privileged host device driver must use this
+ * quirk.
+ * Device TLBs are invalidated under the following six conditions:
+ * 1. Device driver does DMA API unmap IOVA
+ * 2. Device driver unbind a PASID from a process, sva_unbind_device()
+ * 3. PASID is torn down, after PASID cache is flushed. e.g. process
+ * exit_mmap() due to crash
+ * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
+ * VM has to free pages that were unmapped
+ * 5. Userspace driver unmaps a DMA buffer
+ * 6. Cache invalidation in vSVA usage (upcoming)
+ *
+ * For #1 and #2, device drivers are responsible for stopping DMA traffic
+ * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
+ * invalidate TLB the same way as normal user unmap which will use this quirk.
+ * The dTLB invalidation after PASID cache flush does not need this quirk.
+ *
+ * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
+ */
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long mask,
+ u32 pasid, u16 qdep)
+{
+ u16 sid;
+
+ if (likely(!info->dtlb_extra_inval))
+ return;
+
+ sid = PCI_DEVID(info->bus, info->devfn);
+ if (pasid == PASID_RID2PASID) {
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, address, mask);
+ } else {
+ qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
+ pasid, qdep, address, mask);
+ }
+}