diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 195 |
1 files changed, 120 insertions, 75 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e9864e52b0e9..8651f6d4dfa0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -23,7 +23,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/dmar.h> -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/mempool.h> #include <linux/memory.h> #include <linux/cpu.h> @@ -37,7 +37,7 @@ #include <linux/dmi.h> #include <linux/pci-ats.h> #include <linux/memblock.h> -#include <linux/dma-contiguous.h> +#include <linux/dma-map-ops.h> #include <linux/dma-direct.h> #include <linux/crash_dump.h> #include <linux/numa.h> @@ -123,29 +123,29 @@ static inline unsigned int level_to_offset_bits(int level) return (level - 1) * LEVEL_STRIDE; } -static inline int pfn_level_offset(unsigned long pfn, int level) +static inline int pfn_level_offset(u64 pfn, int level) { return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline unsigned long level_mask(int level) +static inline u64 level_mask(int level) { - return -1UL << level_to_offset_bits(level); + return -1ULL << level_to_offset_bits(level); } -static inline unsigned long level_size(int level) +static inline u64 level_size(int level) { - return 1UL << level_to_offset_bits(level); + return 1ULL << level_to_offset_bits(level); } -static inline unsigned long align_to_level(unsigned long pfn, int level) +static inline u64 align_to_level(u64 pfn, int level) { return (pfn + level_size(level) - 1) & level_mask(level); } static inline unsigned long lvl_to_nr_pages(unsigned int lvl) { - return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); + return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } /* VT-d pages must always be _smaller_ than MM pages. Otherwise things @@ -364,7 +364,6 @@ static int iommu_skip_te_disable; int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); -#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) struct device_domain_info *get_domain_info(struct device *dev) { @@ -374,8 +373,7 @@ struct device_domain_info *get_domain_info(struct device *dev) return NULL; info = dev_iommu_priv_get(dev); - if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || - info == DEFER_DEVICE_DOMAIN_INFO)) + if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO)) return NULL; return info; @@ -700,12 +698,47 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, return fls(mask); } +static int domain_update_device_node(struct dmar_domain *domain) +{ + struct device_domain_info *info; + int nid = NUMA_NO_NODE; + + assert_spin_locked(&device_domain_lock); + + if (list_empty(&domain->devices)) + return NUMA_NO_NODE; + + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev) + continue; + + /* + * There could possibly be multiple device numa nodes as devices + * within the same domain may sit behind different IOMMUs. There + * isn't perfect answer in such situation, so we select first + * come first served policy. + */ + nid = dev_to_node(info->dev); + if (nid != NUMA_NO_NODE) + break; + } + + return nid; +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain->iommu_snooping = domain_update_iommu_snooping(NULL); domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); + + /* + * If RHSA is missing, we should default to the device numa domain + * as fall back. + */ + if (domain->nid == NUMA_NO_NODE) + domain->nid = domain_update_device_node(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -742,11 +775,6 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, return &context[devfn]; } -static int iommu_dummy(struct device *dev) -{ - return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO; -} - static bool attach_deferred(struct device *dev) { return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO; @@ -779,6 +807,53 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) return false; } +static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) +{ + struct dmar_drhd_unit *drhd; + u32 vtbar; + int rc; + + /* We know that this device on this chipset has its own IOMMU. + * If we find it under a different IOMMU, then the BIOS is lying + * to us. Hope that the IOMMU for this device is actually + * disabled, and it needs no translation... + */ + rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); + if (rc) { + /* "can't" happen */ + dev_info(&pdev->dev, "failed to run vt-d quirk\n"); + return false; + } + vtbar &= 0xffff0000; + + /* we know that the this iommu should be at offset 0xa000 from vtbar */ + drhd = dmar_find_matched_drhd_unit(pdev); + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + return true; + } + + return false; +} + +static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) +{ + if (!iommu || iommu->drhd->ignored) + return true; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && + quirk_ioat_snb_local_iommu(pdev)) + return true; + } + + return false; +} + struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -788,7 +863,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) u16 segment = 0; int i; - if (!dev || iommu_dummy(dev)) + if (!dev) return NULL; if (dev_is_pci(dev)) { @@ -805,7 +880,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) dev = &ACPI_COMPANION(dev)->dev; rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { if (pdev && segment != drhd->segment) continue; @@ -841,6 +916,9 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } iommu = NULL; out: + if (iommu_is_dummy(iommu, dev)) + iommu = NULL; + rcu_read_unlock(); return iommu; @@ -2447,7 +2525,7 @@ struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; - if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) + if (unlikely(attach_deferred(dev))) return NULL; /* No lock here, assumes no domain exit in normal case */ @@ -2484,7 +2562,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) static int domain_setup_first_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, - int pasid) + u32 pasid) { int flags = PASID_FLAG_SUPERVISOR_MODE; struct dma_pte *pgd = domain->pgd; @@ -2621,7 +2699,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } /* Setup the PASID entry for requests without PASID: */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, flags); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2631,7 +2709,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, flags); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); @@ -3669,6 +3747,8 @@ static const struct dma_map_ops intel_dma_ops = { .dma_supported = dma_direct_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .get_required_mask = intel_get_required_mask, }; @@ -3736,7 +3816,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { tlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; @@ -3922,6 +4002,8 @@ static const struct dma_map_ops bounce_dma_ops = { .sync_sg_for_device = bounce_sync_sg_for_device, .map_resource = bounce_map_resource, .unmap_resource = bounce_unmap_resource, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .dma_supported = dma_direct_supported, }; @@ -3989,35 +4071,6 @@ static void __init iommu_exit_mempool(void) iova_cache_put(); } -static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) -{ - struct dmar_drhd_unit *drhd; - u32 vtbar; - int rc; - - /* We know that this device on this chipset has its own IOMMU. - * If we find it under a different IOMMU, then the BIOS is lying - * to us. Hope that the IOMMU for this device is actually - * disabled, and it needs no translation... - */ - rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); - if (rc) { - /* "can't" happen */ - dev_info(&pdev->dev, "failed to run vt-d quirk\n"); - return; - } - vtbar &= 0xffff0000; - - /* we know that the this iommu should be at offset 0xa000 from vtbar */ - drhd = dmar_find_matched_drhd_unit(pdev); - if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { - pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dev_iommu_priv_set(&pdev->dev, DUMMY_DEVICE_DOMAIN_INFO); - } -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -4049,12 +4102,8 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ drhd->gfx_dedicated = 1; - if (!dmar_map_gfx) { + if (!dmar_map_gfx) drhd->ignored = 1; - for_each_active_dev_scope(drhd->devices, - drhd->devices_cnt, i, dev) - dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO); - } } } @@ -5070,7 +5119,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) switch (type) { case IOMMU_DOMAIN_DMA: - /* fallthrough */ case IOMMU_DOMAIN_UNMANAGED: dmar_domain = alloc_domain(0); if (!dmar_domain) { @@ -5086,8 +5134,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) if (type == IOMMU_DOMAIN_DMA) intel_init_iova_domain(dmar_domain); - domain_update_iommu_cap(dmar_domain); - domain = &dmar_domain->domain; domain->geometry.aperture_start = 0; domain->geometry.aperture_end = @@ -5164,7 +5210,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain, return -ENODEV; if (domain->default_pasid <= 0) { - int pasid; + u32 pasid; /* No private data needed for the default pasid */ pasid = ioasid_alloc(NULL, PASID_MIN, @@ -5399,8 +5445,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, int ret = 0; u64 size = 0; - if (!inv_info || !dmar_domain || - inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + if (!inv_info || !dmar_domain) return -EINVAL; if (!dev || !dev_is_pci(dev)) @@ -5425,8 +5470,8 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, /* Size is only valid in address selective invalidation */ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) - size = to_vtd_size(inv_info->addr_info.granule_size, - inv_info->addr_info.nb_granules); + size = to_vtd_size(inv_info->granu.addr_info.granule_size, + inv_info->granu.addr_info.nb_granules); for_each_set_bit(cache_type, (unsigned long *)&inv_info->cache, @@ -5447,20 +5492,20 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, * granularity. */ if (inv_info->granularity == IOMMU_INV_GRANU_PASID && - (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) - pasid = inv_info->pasid_info.pasid; + (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->granu.pasid_info.pasid; else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && - (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) - pasid = inv_info->addr_info.pasid; + (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->granu.addr_info.pasid; switch (BIT(cache_type)) { case IOMMU_CACHE_INV_TYPE_IOTLB: /* HW will ignore LSB bits based on address mask */ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && size && - (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n", - inv_info->addr_info.addr, size); + inv_info->granu.addr_info.addr, size); } /* @@ -5468,9 +5513,9 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, * We use npages = -1 to indicate that. */ qi_flush_piotlb(iommu, did, pasid, - mm_to_dma_pfn(inv_info->addr_info.addr), + mm_to_dma_pfn(inv_info->granu.addr_info.addr), (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, - inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); if (!info->ats_enabled) break; @@ -5493,7 +5538,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, size = 64 - VTD_PAGE_SHIFT; addr = 0; } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { - addr = inv_info->addr_info.addr; + addr = inv_info->granu.addr_info.addr; } if (info->ats_enabled) |