summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c253
1 files changed, 153 insertions, 100 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index afb6c872f203..d611a3e9c81a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,7 +31,6 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
-#include <linux/dma-direct.h>
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/cpu.h>
@@ -53,6 +52,7 @@
#include <asm/iommu.h>
#include "irq_remapping.h"
+#include "intel-pasid.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -380,60 +380,6 @@ static int hw_pass_through = 1;
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
-struct dmar_domain {
- int nid; /* node id */
-
- unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
- /* Refcount of devices per iommu */
-
-
- u16 iommu_did[DMAR_UNITS_SUPPORTED];
- /* Domain ids per IOMMU. Use u16 since
- * domain ids are 16 bit wide according
- * to VT-d spec, section 9.3 */
-
- bool has_iotlb_device;
- struct list_head devices; /* all devices' list */
- struct iova_domain iovad; /* iova's that belong to this domain */
-
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
-
- int flags; /* flags to find out type of domain */
-
- int iommu_coherency;/* indicate coherency of iommu access */
- int iommu_snooping; /* indicate snooping control feature*/
- int iommu_count; /* reference count of iommu */
- int iommu_superpage;/* Level of superpages supported:
- 0 == 4KiB (no superpages), 1 == 2MiB,
- 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
-
- struct iommu_domain domain; /* generic domain data structure for
- iommu core */
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
- struct list_head link; /* link to domain siblings */
- struct list_head global; /* link to global list */
- u8 bus; /* PCI bus number */
- u8 devfn; /* PCI devfn number */
- u8 pasid_supported:3;
- u8 pasid_enabled:1;
- u8 pri_supported:1;
- u8 pri_enabled:1;
- u8 ats_supported:1;
- u8 ats_enabled:1;
- u8 ats_qdep;
- struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
- struct intel_iommu *iommu; /* IOMMU used by this device */
- struct dmar_domain *domain; /* pointer to domain */
-};
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
@@ -485,14 +431,37 @@ static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
static int iommu_identity_mapping;
#define IDENTMAP_ALL 1
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
-#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap))
-#define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
+/* Broadwell and Skylake have broken ECS support — normal so-called "second
+ * level" translation of DMA requests-without-PASID doesn't actually happen
+ * unless you also set the NESTE bit in an extended context-entry. Which of
+ * course means that SVM doesn't work because it's trying to do nested
+ * translation of the physical addresses it finds in the process page tables,
+ * through the IOVA->phys mapping found in the "second level" page tables.
+ *
+ * The VT-d specification was retroactively changed to change the definition
+ * of the capability bits and pretend that Broadwell/Skylake never happened...
+ * but unfortunately the wrong bit was changed. It's ECS which is broken, but
+ * for some reason it was the PASID capability bit which was redefined (from
+ * bit 28 on BDW/SKL to bit 40 in future).
+ *
+ * So our test for ECS needs to eschew those implementations which set the old
+ * PASID capabiity bit 28, since those are the ones on which ECS is broken.
+ * Unless we are working around the 'pasid28' limitations, that is, by putting
+ * the device into passthrough mode for normal DMA and thus masking the bug.
+ */
+#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
+ (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
+/* PASID support is thus enabled if ECS is enabled and *either* of the old
+ * or new capability bits are set. */
+#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
+ (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -501,6 +470,27 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
+/*
+ * Iterate over elements in device_domain_list and call the specified
+ * callback @fn against each element. This helper should only be used
+ * in the context where the device_domain_lock has already been holden.
+ */
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+ void *data), void *data)
+{
+ int ret = 0;
+ struct device_domain_info *info;
+
+ assert_spin_locked(&device_domain_lock);
+ list_for_each_entry(info, &device_domain_list, global) {
+ ret = fn(info, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
@@ -555,6 +545,11 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable extended context table support\n");
intel_iommu_ecs = 0;
+ } else if (!strncmp(str, "pasid28", 7)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: enable pre-production PASID support\n");
+ intel_iommu_pasid28 = 1;
+ iommu_identity_mapping |= IDENTMAP_GFX;
} else if (!strncmp(str, "tboot_noforce", 13)) {
printk(KERN_INFO
"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
@@ -602,7 +597,7 @@ static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
domains[did & 0xff] = domain;
}
-static inline void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node)
{
struct page *page;
void *vaddr = NULL;
@@ -613,7 +608,7 @@ static inline void *alloc_pgtable_page(int node)
return vaddr;
}
-static inline void free_pgtable_page(void *vaddr)
+void free_pgtable_page(void *vaddr)
{
free_page((unsigned long)vaddr);
}
@@ -696,7 +691,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
}
/* This functionin only returns single iommu in a domain */
-static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
{
int iommu_id;
@@ -1474,6 +1469,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
+ /* For IOMMU that supports device IOTLB throttling (DIT), we assign
+ * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
+ * queue depth at PF level. If DIT is not set, PFSID will be treated as
+ * reserved, which should be set to 0.
+ */
+ if (!ecap_dit(info->iommu->ecap))
+ info->pfsid = 0;
+ else {
+ struct pci_dev *pf_pdev;
+
+ /* pdev will be returned if device is not a vf */
+ pf_pdev = pci_physfn(pdev);
+ info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+ }
#ifdef CONFIG_INTEL_IOMMU_SVM
/* The PCIe spec, in its wisdom, declares that the behaviour of
@@ -1539,7 +1548,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
sid = info->bus << 8 | info->devfn;
qdep = info->ats_qdep;
- qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, addr, mask);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -1773,7 +1783,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (pasid_enabled(iommu)) {
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
- intel_svm_free_pasid_tables(iommu);
+ intel_svm_exit(iommu);
}
#endif
}
@@ -2468,6 +2478,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
info->dev = dev;
info->domain = domain;
info->iommu = iommu;
+ info->pasid_table = NULL;
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
@@ -2525,6 +2536,15 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(&info->global, &device_domain_list);
if (dev)
dev->archdata.iommu = info;
+
+ if (dev && dev_is_pci(dev) && info->pasid_supported) {
+ ret = intel_pasid_alloc_table(dev);
+ if (ret) {
+ __dmar_remove_one_dev_info(info);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ return NULL;
+ }
+ }
spin_unlock_irqrestore(&device_domain_lock, flags);
if (dev && domain_context_mapping(domain, dev)) {
@@ -3277,6 +3297,18 @@ static int __init init_dmars(void)
}
for_each_active_iommu(iommu, drhd) {
+ /*
+ * Find the max pasid size of all IOMMU's in the system.
+ * We need to ensure the system pasid table is no bigger
+ * than the smallest supported.
+ */
+ if (pasid_enabled(iommu)) {
+ u32 temp = 2 << ecap_pss(iommu->ecap);
+
+ intel_pasid_max_id = min_t(u32, temp,
+ intel_pasid_max_id);
+ }
+
g_iommus[iommu->seq_id] = iommu;
intel_iommu_init_qi(iommu);
@@ -3332,7 +3364,7 @@ static int __init init_dmars(void)
hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_enabled(iommu))
- intel_svm_alloc_pasid_tables(iommu);
+ intel_svm_init(iommu);
#endif
}
@@ -3499,7 +3531,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
return iova_pfn;
}
-static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
{
struct dmar_domain *domain, *tmp;
struct dmar_rmrr_unit *rmrr;
@@ -3713,30 +3745,61 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
- void *vaddr;
+ struct page *page = NULL;
+ int order;
- vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
- if (iommu_no_mapping(dev) || !vaddr)
- return vaddr;
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
- *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
- PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
- dev->coherent_dma_mask);
- if (!*dma_handle)
- goto out_free_pages;
- return vaddr;
+ if (!iommu_no_mapping(dev))
+ flags &= ~(GFP_DMA | GFP_DMA32);
+ else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
+ if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
+ flags |= GFP_DMA;
+ else
+ flags |= GFP_DMA32;
+ }
+
+ if (gfpflags_allow_blocking(flags)) {
+ unsigned int count = size >> PAGE_SHIFT;
+
+ page = dma_alloc_from_contiguous(dev, count, order, flags);
+ if (page && iommu_no_mapping(dev) &&
+ page_to_phys(page) + size > dev->coherent_dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
+ }
+
+ if (!page)
+ page = alloc_pages(flags, order);
+ if (!page)
+ return NULL;
+ memset(page_address(page), 0, size);
+
+ *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
+ DMA_BIDIRECTIONAL,
+ dev->coherent_dma_mask);
+ if (*dma_handle)
+ return page_address(page);
+ if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+ __free_pages(page, order);
-out_free_pages:
- dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
return NULL;
}
static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- if (!iommu_no_mapping(dev))
- intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
- dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+ int order;
+ struct page *page = virt_to_page(vaddr);
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+
+ intel_unmap(dev, dma_handle, size);
+ if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+ __free_pages(page, order);
}
static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
@@ -4295,7 +4358,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_enabled(iommu))
- intel_svm_alloc_pasid_tables(iommu);
+ intel_svm_init(iommu);
#endif
if (dmaru->ignored) {
@@ -4847,6 +4910,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
if (info->dev) {
iommu_disable_dev_iotlb(info);
domain_context_clear(iommu, info->dev);
+ intel_pasid_free_table(info->dev);
}
unlink_domain_info(info);
@@ -5172,22 +5236,16 @@ static void intel_iommu_put_resv_regions(struct device *dev,
#ifdef CONFIG_INTEL_IOMMU_SVM
#define MAX_NR_PASID_BITS (20)
-static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
+static inline unsigned long intel_iommu_get_pts(struct device *dev)
{
- /*
- * Convert ecap_pss to extend context entry pts encoding, also
- * respect the soft pasid_max value set by the iommu.
- * - number of PASID bits = ecap_pss + 1
- * - number of PASID table entries = 2^(pts + 5)
- * Therefore, pts = ecap_pss - 4
- * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
- */
- if (ecap_pss(iommu->ecap) < 5)
+ int pts, max_pasid;
+
+ max_pasid = intel_pasid_get_dev_max_id(dev);
+ pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
+ if (pts < 5)
return 0;
- /* pasid_max is encoded as actual number of entries not the bits */
- return find_first_bit((unsigned long *)&iommu->pasid_max,
- MAX_NR_PASID_BITS) - 5;
+ return pts - 5;
}
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
@@ -5223,8 +5281,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
if (!(ctx_lo & CONTEXT_PASIDE)) {
if (iommu->pasid_state_table)
context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
- intel_iommu_get_pts(iommu);
+ context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
+ intel_iommu_get_pts(sdev->dev);
wmb();
/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
@@ -5291,11 +5349,6 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
return NULL;
}
- if (!iommu->pasid_table) {
- dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
- return NULL;
- }
-
return iommu;
}
#endif /* CONFIG_INTEL_IOMMU_SVM */