diff options
Diffstat (limited to 'drivers/iommu')
27 files changed, 2668 insertions, 1083 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 58b4a4dbfc78..aca76383f201 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -303,6 +303,15 @@ config ROCKCHIP_IOMMU Say Y here if you are using a Rockchip SoC that includes an IOMMU device. +config SUN50I_IOMMU + bool "Allwinner H6 IOMMU Support" + depends on ARCH_SUNXI || COMPILE_TEST + select ARM_DMA_USE_IOMMU + select IOMMU_API + select IOMMU_DMA + help + Support for the IOMMU introduced in the Allwinner H6 SoCs. + config TEGRA_IOMMU_GART bool "Tegra GART IOMMU Support" depends on ARCH_TEGRA_2x_SOC @@ -362,7 +371,7 @@ config IPMMU_VMSA config SPAPR_TCE_IOMMU bool "sPAPR TCE IOMMU Support" - depends on PPC_POWERNV || PPC_PSERIES || (PPC && COMPILE_TEST) + depends on PPC_POWERNV || PPC_PSERIES select IOMMU_API help Enables bits of IOMMU API required by VFIO. The iommu_ops @@ -457,7 +466,7 @@ config S390_AP_IOMMU config MTK_IOMMU bool "MTK IOMMU Support" - depends on ARM || ARM64 || COMPILE_TEST + depends on HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 9f33fdb3bb05..57cf4ba5e27c 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o +obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c30367413683..311ef7105c6d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -22,7 +22,6 @@ #include <linux/dma-direct.h> #include <linux/dma-iommu.h> #include <linux/iommu-helper.h> -#include <linux/iommu.h> #include <linux/delay.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> @@ -43,8 +42,7 @@ #include <asm/gart.h> #include <asm/dma.h> -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" +#include "amd_iommu.h" #include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -71,6 +69,8 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL + static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -99,8 +99,9 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); -static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable); /**************************************************************************** * @@ -125,7 +126,8 @@ static inline int get_acpihid_device_id(struct device *dev, return -ENODEV; list_for_each_entry(p, &acpihid_map, list) { - if (acpi_dev_hid_uid_match(adev, p->hid, p->uid)) { + if (acpi_dev_hid_uid_match(adev, p->hid, + p->uid[0] ? p->uid : NULL)) { if (entry) *entry = p; return p->devid; @@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + u64 pt_root = atomic64_read(&domain->pt_root); + + pgtable->root = (u64 *)(pt_root & PAGE_MASK); + pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ +} + +static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + return pt_root; +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -257,12 +279,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -struct iommu_dev_data *get_dev_data(struct device *dev) -{ - return dev->archdata.iommu; -} -EXPORT_SYMBOL(get_dev_data); - /* * Find or create an IOMMU group for a acpihid device. */ @@ -291,16 +307,15 @@ static struct iommu_group *acpihid_device_group(struct device *dev) static bool pci_iommuv2_capable(struct pci_dev *pdev) { static const int caps[] = { - PCI_EXT_CAP_ID_ATS, PCI_EXT_CAP_ID_PRI, PCI_EXT_CAP_ID_PASID, }; int i, pos; - if (pci_ats_disabled()) + if (!pci_ats_supported(pdev)) return false; - for (i = 0; i < 3; ++i) { + for (i = 0; i < 2; ++i) { pos = pci_find_ext_capability(pdev, caps[i]); if (pos == 0) return false; @@ -313,7 +328,7 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) { struct iommu_dev_data *dev_data; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); return dev_data->errata & (1 << erratum) ? true : false; } @@ -348,7 +363,7 @@ static int iommu_init_device(struct device *dev) struct iommu_dev_data *dev_data; int devid; - if (dev->archdata.iommu) + if (dev_iommu_priv_get(dev)) return 0; devid = get_device_id(dev); @@ -375,7 +390,7 @@ static int iommu_init_device(struct device *dev) dev_data->iommu_v2 = iommu->is_iommu_v2; } - dev->archdata.iommu = dev_data; + dev_iommu_priv_set(dev, dev_data); return 0; } @@ -397,22 +412,16 @@ static void iommu_ignore_device(struct device *dev) static void amd_iommu_uninit_device(struct device *dev) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - int devid; - - devid = get_device_id(dev); - if (devid < 0) - return; - - iommu = amd_iommu_rlookup_table[devid]; - dev_data = search_dev_data(devid); + dev_data = dev_iommu_priv_get(dev); if (!dev_data) return; if (dev_data->domain) detach_device(dev); + dev_iommu_priv_set(dev, NULL); + /* * We keep dev_data around for unplugged devices and reuse it when the * device is re-plugged - not doing so would introduce a ton of races. @@ -475,7 +484,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", @@ -1372,15 +1381,19 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -static void free_pagetable(struct protection_domain *domain) +static void free_pagetable(struct domain_pgtable *pgtable) { - unsigned long root = (unsigned long)domain->pt_root; struct page *freelist = NULL; + unsigned long root; - BUG_ON(domain->mode < PAGE_MODE_NONE || - domain->mode > PAGE_MODE_6_LEVEL); + if (pgtable->mode == PAGE_MODE_NONE) + return; + + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); - freelist = free_sub_pt(root, domain->mode, freelist); + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); free_page_list(freelist); } @@ -1394,24 +1407,39 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { + struct domain_pgtable pgtable; unsigned long flags; - bool ret = false; - u64 *pte; + bool ret = true; + u64 *pte, root; spin_lock_irqsave(&domain->lock, flags); - if (address <= PM_LEVEL_SIZE(domain->mode) || - WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address <= PM_LEVEL_SIZE(pgtable.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(domain->mode, - iommu_virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; + *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + + pgtable.root = pte; + pgtable.mode += 1; + update_and_flush_device_table(domain, &pgtable); + domain_flush_complete(domain); + + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode); + atomic64_set(&domain->pt_root, root); ret = true; @@ -1428,16 +1456,29 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { + struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, address, gfp) || *updated; + amd_iommu_domain_get_pgtable(domain, &pgtable); - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + while (address > PM_LEVEL_SIZE(pgtable.mode)) { + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!increase_address_space(domain, address, gfp)) + return NULL; + + /* Read new values to check if update was successful */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + } + + + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -1513,16 +1554,19 @@ static u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size) { + struct domain_pgtable pgtable; int level; u64 *pte; *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->mode)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address > PM_LEVEL_SIZE(pgtable.mode)) return NULL; - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -1637,7 +1681,13 @@ out: unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - update_domain(dom); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + domain_flush_tlb_pde(dom); + domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1756,78 +1806,18 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -/* - * Free a domain, only used if something went wrong in the - * allocation path and we need to free an already allocated page table - */ -static void dma_ops_domain_free(struct protection_domain *domain) -{ - if (!domain) - return; - - iommu_put_dma_cookie(&domain->domain); - - free_pagetable(domain); - - if (domain->id) - domain_id_free(domain->id); - - kfree(domain); -} - -/* - * Allocates a new protection domain usable for the dma_ops functions. - * It also initializes the page table and the address allocator data - * structures required for the dma_ops interface - */ -static struct protection_domain *dma_ops_domain_alloc(void) -{ - struct protection_domain *domain; - - domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL); - if (!domain) - return NULL; - - if (protection_domain_init(domain)) - goto free_domain; - - domain->mode = PAGE_MODE_3_LEVEL; - domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - domain->flags = PD_DMA_OPS_MASK; - if (!domain->pt_root) - goto free_domain; - - if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM) - goto free_domain; - - return domain; - -free_domain: - dma_ops_domain_free(domain); - - return NULL; -} - -/* - * little helper function to check whether a given protection domain is a - * dma_ops domain - */ -static bool dma_ops_domain(struct protection_domain *domain) -{ - return domain->flags & PD_DMA_OPS_MASK; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain, + struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (domain->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(domain->pt_root); + if (pgtable->mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(pgtable->root); - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1900,6 +1890,7 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1915,7 +1906,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); + amd_iommu_domain_get_pgtable(domain, &pgtable); + set_dte_entry(dev_data->devid, domain, &pgtable, + ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); device_flush_dte(dev_data); @@ -2031,7 +2024,7 @@ static int attach_device(struct device *dev, spin_lock_irqsave(&domain->lock, flags); - dev_data = get_dev_data(dev); + dev_data = dev_iommu_priv_get(dev); spin_lock(&dev_data->lock); @@ -2095,7 +2088,7 @@ static void detach_device(struct device *dev) struct iommu_dev_data *dev_data; unsigned long flags; - dev_data = get_dev_data(dev); + dev_data = dev_iommu_priv_get(dev); domain = dev_data->domain; spin_lock_irqsave(&domain->lock, flags); @@ -2144,7 +2137,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) iommu = amd_iommu_rlookup_table[devid]; - if (get_dev_data(dev)) + if (dev_iommu_priv_get(dev)) return &iommu->iommu; ret = iommu_init_device(dev); @@ -2174,16 +2167,12 @@ static void amd_iommu_probe_finalize(struct device *dev) static void amd_iommu_release_device(struct device *dev) { + int devid = get_device_id(dev); struct amd_iommu *iommu; - int devid; if (!check_device(dev)) return; - devid = get_device_id(dev); - if (devid < 0) - return; - iommu = amd_iommu_rlookup_table[devid]; amd_iommu_uninit_device(dev); @@ -2224,23 +2213,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain) +static void update_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, - dev_data->iommu_v2); + set_dte_entry(dev_data->devid, domain, pgtable, + dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + update_device_table(domain, pgtable); + domain_flush_devices(domain); +} + static void update_domain(struct protection_domain *domain) { - update_device_table(domain); + struct domain_pgtable pgtable; - domain_flush_devices(domain); + /* Update device table */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + update_and_flush_device_table(domain, &pgtable); + + /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); + domain_flush_complete(domain); } int __init amd_iommu_init_api(void) @@ -2308,27 +2310,46 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { + struct domain_pgtable pgtable; + if (!domain) return; if (domain->id) domain_id_free(domain->id); + amd_iommu_domain_get_pgtable(domain, &pgtable); + atomic64_set(&domain->pt_root, 0); + free_pagetable(&pgtable); + kfree(domain); } -static int protection_domain_init(struct protection_domain *domain) +static int protection_domain_init(struct protection_domain *domain, int mode) { + u64 *pt_root = NULL, root; + + BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); + spin_lock_init(&domain->lock); domain->id = domain_id_alloc(); if (!domain->id) return -ENOMEM; INIT_LIST_HEAD(&domain->dev_list); + if (mode != PAGE_MODE_NONE) { + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) + return -ENOMEM; + } + + root = amd_iommu_domain_encode_pgtable(pt_root, mode); + atomic64_set(&domain->pt_root, root); + return 0; } -static struct protection_domain *protection_domain_alloc(void) +static struct protection_domain *protection_domain_alloc(int mode) { struct protection_domain *domain; @@ -2336,7 +2357,7 @@ static struct protection_domain *protection_domain_alloc(void) if (!domain) return NULL; - if (protection_domain_init(domain)) + if (protection_domain_init(domain, mode)) goto out_err; return domain; @@ -2349,45 +2370,30 @@ out_err: static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { - struct protection_domain *pdomain; + struct protection_domain *domain; + int mode = DEFAULT_PGTABLE_LEVEL; - switch (type) { - case IOMMU_DOMAIN_UNMANAGED: - pdomain = protection_domain_alloc(); - if (!pdomain) - return NULL; + if (type == IOMMU_DOMAIN_IDENTITY) + mode = PAGE_MODE_NONE; - pdomain->mode = PAGE_MODE_3_LEVEL; - pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pdomain->pt_root) { - protection_domain_free(pdomain); - return NULL; - } + domain = protection_domain_alloc(mode); + if (!domain) + return NULL; - pdomain->domain.geometry.aperture_start = 0; - pdomain->domain.geometry.aperture_end = ~0ULL; - pdomain->domain.geometry.force_aperture = true; + domain->domain.geometry.aperture_start = 0; + domain->domain.geometry.aperture_end = ~0ULL; + domain->domain.geometry.force_aperture = true; - break; - case IOMMU_DOMAIN_DMA: - pdomain = dma_ops_domain_alloc(); - if (!pdomain) { - pr_err("Failed to allocate\n"); - return NULL; - } - break; - case IOMMU_DOMAIN_IDENTITY: - pdomain = protection_domain_alloc(); - if (!pdomain) - return NULL; + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&domain->domain) == -ENOMEM) + goto free_domain; - pdomain->mode = PAGE_MODE_NONE; - break; - default: - return NULL; - } + return &domain->domain; - return &pdomain->domain; +free_domain: + protection_domain_free(domain); + + return NULL; } static void amd_iommu_domain_free(struct iommu_domain *dom) @@ -2404,27 +2410,19 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) if (!dom) return; - switch (dom->type) { - case IOMMU_DOMAIN_DMA: - /* Now release the domain */ - dma_ops_domain_free(domain); - break; - default: - if (domain->mode != PAGE_MODE_NONE) - free_pagetable(domain); + if (dom->type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&domain->domain); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); - protection_domain_free(domain); - break; - } + protection_domain_free(domain); } static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct amd_iommu *iommu; int devid; @@ -2462,7 +2460,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!check_device(dev)) return -EINVAL; - dev_data = dev->archdata.iommu; + dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; iommu = amd_iommu_rlookup_table[dev_data->devid]; @@ -2493,10 +2491,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; int prot = 0; int ret; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2516,8 +2516,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return 0; return iommu_unmap_page(domain, iova, page_size); @@ -2528,9 +2530,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); unsigned long offset_mask, pte_pgsize; + struct domain_pgtable pgtable; u64 *pte, __pte; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return iova; pte = fetch_pte(domain, iova, &pte_pgsize); @@ -2612,12 +2616,14 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) { - struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + return dev_data->defer_attach; } +EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred); static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { @@ -2640,7 +2646,7 @@ static int amd_iommu_def_domain_type(struct device *dev) { struct iommu_dev_data *dev_data; - dev_data = get_dev_data(dev); + dev_data = dev_iommu_priv_get(dev); if (!dev_data) return 0; @@ -2699,18 +2705,22 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); + /* First save pgtable configuration*/ + amd_iommu_domain_get_pgtable(domain, &pgtable); + /* Update data structure */ - domain->mode = PAGE_MODE_NONE; + atomic64_set(&domain->pt_root, 0); /* Make changes visible to IOMMUs */ update_domain(domain); /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(domain); + free_pagetable(&pgtable); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2899,9 +2909,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, int pasid, unsigned long cr3) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -2915,9 +2927,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid, static int __clear_gcr3(struct protection_domain *domain, int pasid) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); @@ -2965,7 +2979,7 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, struct amd_iommu *iommu; struct iommu_cmd cmd; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); iommu = amd_iommu_rlookup_table[dev_data->devid]; build_complete_ppr(&cmd, dev_data->devid, pasid, status, @@ -2978,23 +2992,27 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr); struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) { struct protection_domain *pdomain; - struct iommu_domain *io_domain; + struct iommu_dev_data *dev_data; struct device *dev = &pdev->dev; + struct iommu_domain *io_domain; if (!check_device(dev)) return NULL; - pdomain = get_dev_data(dev)->domain; - if (pdomain == NULL && get_dev_data(dev)->defer_attach) { - get_dev_data(dev)->defer_attach = false; - io_domain = iommu_get_domain_for_dev(dev); + dev_data = dev_iommu_priv_get(&pdev->dev); + pdomain = dev_data->domain; + io_domain = iommu_get_domain_for_dev(dev); + + if (pdomain == NULL && dev_data->defer_attach) { + dev_data->defer_attach = false; pdomain = to_pdomain(io_domain); attach_device(dev, pdomain); } + if (pdomain == NULL) return NULL; - if (!dma_ops_domain(pdomain)) + if (io_domain->type != IOMMU_DOMAIN_DMA) return NULL; /* Only return IOMMUv2 domains */ @@ -3012,7 +3030,7 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) if (!amd_iommu_v2_supported()) return; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); dev_data->errata |= (1 << erratum); } EXPORT_SYMBOL(amd_iommu_enable_device_erratum); @@ -3031,11 +3049,8 @@ int amd_iommu_device_info(struct pci_dev *pdev, memset(info, 0, sizeof(*info)); - if (!pci_ats_disabled()) { - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); - if (pos) - info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; - } + if (pci_ats_supported(pdev)) + info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); if (pos) diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h index 12d540d9b59b..f892992c8744 100644 --- a/drivers/iommu/amd_iommu.h +++ b/drivers/iommu/amd_iommu.h @@ -1,9 +1,103 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + */ #ifndef AMD_IOMMU_H #define AMD_IOMMU_H -int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line); +#include <linux/iommu.h> + +#include "amd_iommu_types.h" + +extern int amd_iommu_get_num_iommus(void); +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_thread(int irq, void *data); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); +extern int amd_iommu_init_api(void); + +#ifdef CONFIG_AMD_IOMMU_DEBUGFS +void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +#else +static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +#endif + +/* Needed for interrupt remapping */ +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); +extern int amd_iommu_guest_ir; + +/* IOMMUv2 specific functions */ +struct iommu_domain; + +extern bool amd_iommu_v2_supported(void); +extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); +extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); +extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); +extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address); +extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); +extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3); +extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); +extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); + +#ifdef CONFIG_IRQ_REMAP +extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); +#else +static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) +{ + return 0; +} +#endif + +#define PPR_SUCCESS 0x0 +#define PPR_INVALID 0x1 +#define PPR_FAILURE 0xf + +extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag); + +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + +static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +{ + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return false; + + return !!(iommu->features & f); +} + +static inline u64 iommu_virt_to_phys(void *vaddr) +{ + return (u64)__sme_set(virt_to_phys(vaddr)); +} + +static inline void *iommu_phys_to_virt(unsigned long paddr) +{ + return phys_to_virt(__sme_clr(paddr)); +} + +extern bool translation_pre_enabled(struct amd_iommu *iommu); +extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev); +extern int __init add_special_device(u8 type, u8 id, u16 *devid, + bool cmd_line); #ifdef CONFIG_DMI void amd_iommu_apply_ivrs_quirks(void); diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd_iommu_debugfs.c index c6a5c737ef09..545372fcc72f 100644 --- a/drivers/iommu/amd_iommu_debugfs.c +++ b/drivers/iommu/amd_iommu_debugfs.c @@ -8,10 +8,9 @@ */ #include <linux/debugfs.h> -#include <linux/iommu.h> #include <linux/pci.h> -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" + +#include "amd_iommu.h" static struct dentry *amd_iommu_debugfs; static DEFINE_MUTEX(amd_iommu_debugfs_lock); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6be3853a5d97..3faff7f80fd2 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -18,7 +18,6 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> -#include <linux/iommu.h> #include <linux/kmemleak.h> #include <linux/mem_encrypt.h> #include <asm/pci-direct.h> @@ -32,9 +31,8 @@ #include <asm/irq_remapping.h> #include <linux/crash_dump.h> + #include "amd_iommu.h" -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" #include "irq_remapping.h" /* @@ -1329,8 +1327,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } case IVHD_DEV_ACPI_HID: { u16 devid; - u8 hid[ACPIHID_HID_LEN] = {0}; - u8 uid[ACPIHID_UID_LEN] = {0}; + u8 hid[ACPIHID_HID_LEN]; + u8 uid[ACPIHID_UID_LEN]; int ret; if (h->type != 0x40) { @@ -1347,6 +1345,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } + uid[0] = '\0'; switch (e->uidf) { case UID_NOT_PRESENT: @@ -1361,8 +1360,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case UID_IS_CHARACTER: - memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1); - uid[ACPIHID_UID_LEN - 1] = '\0'; + memcpy(uid, &e->uid, e->uidl); + uid[e->uidl] = '\0'; break; default: @@ -2936,7 +2935,7 @@ static int __init parse_amd_iommu_intr(char *str) { for (; *str; ++str) { if (strncmp(str, "legacy", 6) == 0) { - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; break; } if (strncmp(str, "vapic", 5) == 0) { diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h deleted file mode 100644 index 92c2ba6468a0..000000000000 --- a/drivers/iommu/amd_iommu_proto.h +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel <jroedel@suse.de> - */ - -#ifndef _ASM_X86_AMD_IOMMU_PROTO_H -#define _ASM_X86_AMD_IOMMU_PROTO_H - -#include "amd_iommu_types.h" - -extern int amd_iommu_get_num_iommus(void); -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); -extern irqreturn_t amd_iommu_int_thread(int irq, void *data); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); -extern int amd_iommu_init_devices(void); -extern void amd_iommu_uninit_devices(void); -extern void amd_iommu_init_notifier(void); -extern int amd_iommu_init_api(void); - -#ifdef CONFIG_AMD_IOMMU_DEBUGFS -void amd_iommu_debugfs_setup(struct amd_iommu *iommu); -#else -static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} -#endif - -/* Needed for interrupt remapping */ -extern int amd_iommu_prepare(void); -extern int amd_iommu_enable(void); -extern void amd_iommu_disable(void); -extern int amd_iommu_reenable(int); -extern int amd_iommu_enable_faulting(void); -extern int amd_iommu_guest_ir; - -/* IOMMUv2 specific functions */ -struct iommu_domain; - -extern bool amd_iommu_v2_supported(void); -extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); -extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); -extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); -extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, - u64 address); -extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); -extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, - unsigned long cr3); -extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); -extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); - -#ifdef CONFIG_IRQ_REMAP -extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); -#else -static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) -{ - return 0; -} -#endif - -#define PPR_SUCCESS 0x0 -#define PPR_INVALID 0x1 -#define PPR_FAILURE 0xf - -extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, - int status, int tag); - -static inline bool is_rd890_iommu(struct pci_dev *pdev) -{ - return (pdev->vendor == PCI_VENDOR_ID_ATI) && - (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); -} - -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) -{ - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); -} - -static inline u64 iommu_virt_to_phys(void *vaddr) -{ - return (u64)__sme_set(virt_to_phys(vaddr)); -} - -static inline void *iommu_phys_to_virt(unsigned long paddr) -{ - return phys_to_virt(__sme_clr(paddr)); -} - -extern bool translation_pre_enabled(struct amd_iommu *iommu); -extern struct iommu_dev_data *get_dev_data(struct device *dev); -#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d0d7b6a0c3d8..30a5d412255a 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -395,10 +395,10 @@ #define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ extern bool amd_iommu_dump; -#define DUMP_printk(format, arg...) \ - do { \ - if (amd_iommu_dump) \ - printk(KERN_INFO "AMD-Vi: " format, ## arg); \ +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + pr_info("AMD-Vi: " format, ## arg); \ } while(0); /* global flag if IOMMUs cache non-present entries */ @@ -468,8 +468,7 @@ struct protection_domain { iommu core code */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ @@ -477,6 +476,12 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; +/* For decocded pt_root */ +struct domain_pgtable { + int mode; + u64 *root; +}; + /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index d6d85debd01b..c8a7b6b39222 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -13,13 +13,11 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/mm.h> -#include <linux/iommu.h> #include <linux/wait.h> #include <linux/pci.h> #include <linux/gfp.h> -#include "amd_iommu_types.h" -#include "amd_iommu_proto.h" +#include "amd_iommu.h" MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); @@ -517,13 +515,12 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) struct amd_iommu_fault *iommu_fault; struct pasid_state *pasid_state; struct device_state *dev_state; + struct pci_dev *pdev = NULL; unsigned long flags; struct fault *fault; bool finish; u16 tag, devid; int ret; - struct iommu_dev_data *dev_data; - struct pci_dev *pdev = NULL; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; @@ -534,12 +531,11 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) devid & 0xff); if (!pdev) return -ENODEV; - dev_data = get_dev_data(&pdev->dev); - /* In kdump kernel pci dev is not initialized yet -> send INVALID */ ret = NOTIFY_DONE; - if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) - && dev_data->defer_attach) { + + /* In kdump kernel pci dev is not initialized yet -> send INVALID */ + if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) { amd_iommu_complete_ppr(pdev, iommu_fault->pasid, PPR_INVALID, tag); goto out; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8a908c50c306..f578677a5c41 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2663,26 +2663,20 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) } } -#ifdef CONFIG_PCI_ATS static bool arm_smmu_ats_supported(struct arm_smmu_master *master) { - struct pci_dev *pdev; + struct device *dev = master->dev; struct arm_smmu_device *smmu = master->smmu; - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) || - !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled()) + if (!(smmu->features & ARM_SMMU_FEAT_ATS)) return false; - pdev = to_pci_dev(master->dev); - return !pdev->untrusted && pdev->ats_cap; -} -#else -static bool arm_smmu_ats_supported(struct arm_smmu_master *master) -{ - return false; + if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) + return false; + + return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -#endif static void arm_smmu_enable_ats(struct arm_smmu_master *master) { diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index f77dae7ba7d4..60a2970c37ff 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -963,6 +963,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } + iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), @@ -1156,12 +1157,11 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } -static int qi_check_fault(struct intel_iommu *iommu, int index) +static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; struct q_inval *qi = iommu->qi; - int wait_index = (index + 1) % QI_LENGTH; int shift = qi_shift(iommu); if (qi->desc_status[wait_index] == QI_ABORT) @@ -1224,17 +1224,21 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) } /* - * Submit the queued invalidation descriptor to the remapping - * hardware unit and wait for its completion. + * Function to submit invalidation descriptors of all types to the queued + * invalidation interface(QI). Multiple descriptors can be submitted at a + * time, a wait descriptor will be appended to each submission to ensure + * hardware has completed the invalidation before return. Wait descriptors + * can be part of the submission but it will not be polled for completion. */ -int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, + unsigned int count, unsigned long options) { - int rc; struct q_inval *qi = iommu->qi; - int offset, shift, length; struct qi_desc wait_desc; int wait_index, index; unsigned long flags; + int offset, shift; + int rc, i; if (!qi) return 0; @@ -1243,32 +1247,41 @@ restart: rc = 0; raw_spin_lock_irqsave(&qi->q_lock, flags); - while (qi->free_cnt < 3) { + /* + * Check if we have enough empty slots in the queue to submit, + * the calculation is based on: + * # of desc + 1 wait desc + 1 space between head and tail + */ + while (qi->free_cnt < count + 2) { raw_spin_unlock_irqrestore(&qi->q_lock, flags); cpu_relax(); raw_spin_lock_irqsave(&qi->q_lock, flags); } index = qi->free_head; - wait_index = (index + 1) % QI_LENGTH; + wait_index = (index + count) % QI_LENGTH; shift = qi_shift(iommu); - length = 1 << shift; - qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + for (i = 0; i < count; i++) { + offset = ((index + i) % QI_LENGTH) << shift; + memcpy(qi->desc + offset, &desc[i], 1 << shift); + qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; + } + qi->desc_status[wait_index] = QI_IN_USE; - offset = index << shift; - memcpy(qi->desc + offset, desc, length); wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + if (options & QI_OPT_WAIT_DRAIN) + wait_desc.qw0 |= QI_IWD_PRQ_DRAIN; wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); wait_desc.qw2 = 0; wait_desc.qw3 = 0; offset = wait_index << shift; - memcpy(qi->desc + offset, &wait_desc, length); + memcpy(qi->desc + offset, &wait_desc, 1 << shift); - qi->free_head = (qi->free_head + 2) % QI_LENGTH; - qi->free_cnt -= 2; + qi->free_head = (qi->free_head + count + 1) % QI_LENGTH; + qi->free_cnt -= count + 1; /* * update the HW tail register indicating the presence of @@ -1284,7 +1297,7 @@ restart: * a deadlock where the interrupt context can wait indefinitely * for free slots in the queue. */ - rc = qi_check_fault(iommu, index); + rc = qi_check_fault(iommu, index, wait_index); if (rc) break; @@ -1293,7 +1306,8 @@ restart: raw_spin_lock(&qi->q_lock); } - qi->desc_status[index] = QI_DONE; + for (i = 0; i < count; i++) + qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; reclaim_free_desc(qi); raw_spin_unlock_irqrestore(&qi->q_lock, flags); @@ -1317,7 +1331,7 @@ void qi_global_iec(struct intel_iommu *iommu) desc.qw3 = 0; /* should never fail */ - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, @@ -1331,7 +1345,7 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, @@ -1355,7 +1369,7 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, @@ -1377,7 +1391,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } /* PASID-based IOTLB invalidation */ @@ -1418,7 +1432,46 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, QI_EIOTLB_AM(mask); } - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); +} + +/* PASID-based device IOTLB Invalidate */ +void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u32 pasid, u16 qdep, u64 addr, + unsigned int size_order, u64 granu) +{ + unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); + struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + + desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(pfsid); + desc.qw1 = QI_DEV_EIOTLB_GLOB(granu); + + /* + * If S bit is 0, we only flush a single page. If S bit is set, + * The least significant zero bit indicates the invalidation address + * range. VT-d spec 6.5.2.6. + * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. + * size order = 0 is PAGE_SIZE 4KB + * Max Invs Pending (MIP) is set to 0 for now until we have DIT in + * ECAP. + */ + desc.qw1 |= addr & ~mask; + if (size_order) + desc.qw1 |= QI_DEV_EIOTLB_SIZE; + + qi_submit_sync(iommu, &desc, 1, 0); +} + +void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, + u64 granu, int pasid) +{ + struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + + desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) | + QI_PC_GRAN(granu) | QI_PC_TYPE; + qi_submit_sync(iommu, &desc, 1, 0); } /* diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index a386b83e0e34..3c0c67a99c7b 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -131,7 +131,7 @@ static int hyperv_irq_remapping_activate(struct irq_domain *domain, return 0; } -static struct irq_domain_ops hyperv_ir_domain_ops = { +static const struct irq_domain_ops hyperv_ir_domain_ops = { .alloc = hyperv_irq_remapping_alloc, .free = hyperv_irq_remapping_free, .activate = hyperv_irq_remapping_activate, diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 3eb1fe240fb0..cf1ebb98e418 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -372,6 +372,66 @@ static int domain_translation_struct_show(struct seq_file *m, void *unused) } DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); +static void invalidation_queue_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + int index, shift = qi_shift(iommu); + struct qi_desc *desc; + int offset; + + if (ecap_smts(iommu->ecap)) + seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tqw2\t\t\tqw3\t\t\tstatus\n"); + else + seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tstatus\n"); + + for (index = 0; index < QI_LENGTH; index++) { + offset = index << shift; + desc = iommu->qi->desc + offset; + if (ecap_smts(iommu->ecap)) + seq_printf(m, "%5d\t%016llx\t%016llx\t%016llx\t%016llx\t%016x\n", + index, desc->qw0, desc->qw1, + desc->qw2, desc->qw3, + iommu->qi->desc_status[index]); + else + seq_printf(m, "%5d\t%016llx\t%016llx\t%016x\n", + index, desc->qw0, desc->qw1, + iommu->qi->desc_status[index]); + } +} + +static int invalidation_queue_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long flags; + struct q_inval *qi; + int shift; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + qi = iommu->qi; + shift = qi_shift(iommu); + + if (!qi || !ecap_qis(iommu->ecap)) + continue; + + seq_printf(m, "Invalidation queue on IOMMU: %s\n", iommu->name); + + raw_spin_lock_irqsave(&qi->q_lock, flags); + seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n", + (u64)virt_to_phys(qi->desc), + dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift, + dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift); + invalidation_queue_entry_show(m, iommu); + raw_spin_unlock_irqrestore(&qi->q_lock, flags); + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(invalidation_queue); + #ifdef CONFIG_IRQ_REMAP static void ir_tbl_remap_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -490,6 +550,8 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("domain_translation_struct", 0444, intel_iommu_debug, NULL, &domain_translation_struct_fops); + debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug, + NULL, &invalidation_queue_fops); #ifdef CONFIG_IRQ_REMAP debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b906727f5b85..648a785e078a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - -/* - * This is a DMA domain allocated through the iommu domain allocation - * interface. But one or more devices belonging to this domain have - * been chosen to use a private domain. We should avoid to use the - * map/unmap/iova_to_phys APIs on it. - */ -#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) - -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(3) - #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu); -static bool device_is_rmrr_locked(struct device *dev); static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev); static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -371,11 +341,11 @@ int dmar_disabled = 0; int dmar_disabled = 1; #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */ -#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON +#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON int intel_iommu_sm = 1; #else int intel_iommu_sm; -#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */ +#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */ int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); @@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) +struct device_domain_info *get_domain_info(struct device *dev) +{ + struct device_domain_info *info; + + if (!dev) + return NULL; + + info = dev->archdata.iommu; + if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || + info == DEFER_DEVICE_DOMAIN_INFO)) + return NULL; + + return info; +} + DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; } -/* Convert generic 'struct iommu_domain to private struct dmar_domain */ -static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - static int __init intel_iommu_setup(char *str) { if (!str) @@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str) pr_info("Intel-IOMMU: scalable mode supported\n"); intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { - printk(KERN_INFO - "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; } else if (!strncmp(str, "nobounce", 8)) { pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); @@ -1454,8 +1432,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif - if (!pdev->untrusted && info->ats_supported && - pci_ats_page_aligned(pdev) && + if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); @@ -1763,6 +1740,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) + ioasid_unregister_allocator(&iommu->pasid_allocator); + #endif } @@ -1911,11 +1891,6 @@ static int dmar_init_reserved_ranges(void) return 0; } -static void domain_reserve_special_ranges(struct dmar_domain *domain) -{ - copy_reserved_iova(&reserved_iova_list, &domain->iovad); -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -1930,65 +1905,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int ret; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - if (!intel_iommu_strict) { - ret = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { @@ -1996,7 +1912,8 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ - put_iova_domain(&domain->iovad); + if (domain->domain.type == IOMMU_DOMAIN_DMA) + put_iova_domain(&domain->iovad); if (domain->pgd) { struct page *freelist; @@ -2518,11 +2435,8 @@ struct dmar_domain *find_domain(struct device *dev) if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; - if (dev_is_pci(dev)) - dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; - /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; + info = get_domain_info(dev); if (likely(info)) return info->domain; @@ -2545,7 +2459,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) struct device_domain_info *info; list_for_each_entry(info, &device_domain_list, global) - if (info->iommu->segment == segment && info->bus == bus && + if (info->segment == segment && info->bus == bus && info->devfn == devfn) return info; @@ -2582,6 +2496,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags); } +static bool dev_is_real_dma_subdevice(struct device *dev) +{ + return dev && dev_is_pci(dev) && + pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -2596,8 +2516,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!info) return NULL; - info->bus = bus; - info->devfn = devfn; + if (!dev_is_real_dma_subdevice(dev)) { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } else { + struct pci_dev *pdev = to_pci_dev(dev); + + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } + info->ats_supported = info->pasid_supported = info->pri_supported = 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; info->ats_qdep = 0; @@ -2611,10 +2541,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pdev->untrusted && - !pci_ats_disabled() && - ecap_dev_iotlb_support(iommu->ecap) && - pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; @@ -2637,7 +2565,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!found) { struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, + info->devfn); if (info2) { found = info2->domain; info2->dev = dev; @@ -2704,108 +2633,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, return domain; } -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) -{ - *(u16 *)opaque = alias; - return 0; -} - -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) -{ - struct device_domain_info *info; - struct dmar_domain *domain = NULL; - struct intel_iommu *iommu; - u16 dma_alias; - unsigned long flags; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), - PCI_BUS_NUM(dma_alias), - dma_alias & 0xff); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - /* DMA alias already has a domain, use it */ - if (info) - goto out; - } - - /* Allocate and initialize new domain for the device */ - domain = alloc_domain(0); - if (!domain) - return NULL; - if (domain_init(domain, iommu, gaw)) { - domain_exit(domain); - return NULL; - } - -out: - return domain; -} - -static struct dmar_domain *set_domain_for_dev(struct device *dev, - struct dmar_domain *domain) -{ - struct intel_iommu *iommu; - struct dmar_domain *tmp; - u16 req_id, dma_alias; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - req_id = ((u16)bus << 8) | devfn; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - /* register PCI DMA alias device */ - if (req_id != dma_alias) { - tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias), - dma_alias & 0xff, NULL, domain); - - if (!tmp || tmp != domain) - return tmp; - } - } - - tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); - if (!tmp || tmp != domain) - return tmp; - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) + unsigned long first_vpfn, + unsigned long last_vpfn) { - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), - dma_to_mm_pfn(last_vpfn))) { - pr_err("Reserving iova failed\n"); - return -ENOMEM; - } - - pr_debug("Mapping reserved region %llx-%llx\n", start, end); /* * RMRR range might have overlap with physical memory range, * clear it first @@ -2817,45 +2648,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int domain_prepare_identity_map(struct device *dev, - struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) -{ - /* For _hardware_ passthrough, don't bother. But for software - passthrough, we do it anyway -- it may indicate a memory - range which is reserved in E820, so which didn't get set - up to start with in si_domain */ - if (domain == si_domain && hw_pass_through) { - dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", - start, end); - return 0; - } - - dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); - - if (end < start) { - WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - if (end >> agaw_to_width(domain->agaw)) { - WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - agaw_to_width(domain->agaw), - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - return iommu_domain_identity_map(domain, start, end); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) @@ -2882,7 +2674,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + mm_to_dma_pfn(start_pfn), + mm_to_dma_pfn(end_pfn)); if (ret) return ret; } @@ -2911,17 +2704,6 @@ static int __init si_domain_init(int hw) return 0; } -static int identity_mapping(struct device *dev) -{ - struct device_domain_info *info; - - info = dev->archdata.iommu; - if (info) - return (info->domain == si_domain); - - return 0; -} - static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { struct dmar_domain *ndomain; @@ -3048,31 +2830,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; - - /* - * We want to start off with all devices in the 1:1 domain, and - * take them out later if we find they can't access all of memory. - * - * However, we can't do this for PCI devices behind bridges, - * because all PCI devices behind the same bridge will end up - * with the same source-id on their transactions. - * - * Practically speaking, we can't change things around for these - * devices at run-time, because we can't be sure there'll be no - * DMA transactions in flight for any of their siblings. - * - * So PCI devices (unless they're on the root bus) as well as - * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of - * the 1:1 domain, just in _case_ one of their siblings turns out - * not to be able to map all of memory. - */ - if (!pci_is_pcie(pdev)) { - if (!pci_is_root_bus(pdev->bus)) - return IOMMU_DOMAIN_DMA; - if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return IOMMU_DOMAIN_DMA; - } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return IOMMU_DOMAIN_DMA; } return 0; @@ -3297,6 +3054,85 @@ out_unmap: return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM +static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) +{ + struct intel_iommu *iommu = data; + ioasid_t ioasid; + + if (!iommu) + return INVALID_IOASID; + /* + * VT-d virtual command interface always uses the full 20 bit + * PASID range. Host can partition guest PASID range based on + * policies but it is out of guest's control. + */ + if (min < PASID_MIN || max > intel_pasid_max_id) + return INVALID_IOASID; + + if (vcmd_alloc_pasid(iommu, &ioasid)) + return INVALID_IOASID; + + return ioasid; +} + +static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) +{ + struct intel_iommu *iommu = data; + + if (!iommu) + return; + /* + * Sanity check the ioasid owner is done at upper layer, e.g. VFIO + * We can only free the PASID when all the devices are unbound. + */ + if (ioasid_find(NULL, ioasid, NULL)) { + pr_alert("Cannot free active IOASID %d\n", ioasid); + return; + } + vcmd_free_pasid(iommu, ioasid); +} + +static void register_pasid_allocator(struct intel_iommu *iommu) +{ + /* + * If we are running in the host, no need for custom allocator + * in that PASIDs are allocated from the host system-wide. + */ + if (!cap_caching_mode(iommu->cap)) + return; + + if (!sm_supported(iommu)) { + pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); + return; + } + + /* + * Register a custom PASID allocator if we are running in a guest, + * guest PASID must be obtained via virtual command interface. + * There can be multiple vIOMMUs in each guest but only one allocator + * is active. All vIOMMU allocators will eventually be calling the same + * host allocator. + */ + if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap)) + return; + + pr_info("Register custom PASID allocator\n"); + iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; + iommu->pasid_allocator.free = intel_vcmd_ioasid_free; + iommu->pasid_allocator.pdata = (void *)iommu; + if (ioasid_register_allocator(&iommu->pasid_allocator)) { + pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); + /* + * Disable scalable mode on this IOMMU if there + * is no custom allocator. Mixing SM capable vIOMMU + * and non-SM vIOMMU are not supported. + */ + intel_iommu_sm = 0; + } +} +#endif + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -3414,6 +3250,9 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + register_pasid_allocator(iommu); +#endif iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); @@ -3531,100 +3370,6 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_private_domain_for_dev(struct device *dev) -{ - struct dmar_domain *domain, *tmp; - struct dmar_rmrr_unit *rmrr; - struct device *i_dev; - int i, ret; - - /* Device shouldn't be attached by any domains. */ - domain = find_domain(dev); - if (domain) - return NULL; - - domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - goto out; - - /* We have a new domain - setup possible RMRRs for the device */ - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, i_dev) { - if (i_dev != dev) - continue; - - ret = domain_prepare_identity_map(dev, domain, - rmrr->base_address, - rmrr->end_address); - if (ret) - dev_err(dev, "Mapping reserved region failed\n"); - } - } - rcu_read_unlock(); - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - if (!domain) - dev_err(dev, "Allocating domain failed\n"); - else - domain->domain.type = IOMMU_DOMAIN_DMA; - - return domain; -} - -/* Check if the dev needs to go through non-identity map and unmap process.*/ -static bool iommu_need_mapping(struct device *dev) -{ - int ret; - - if (iommu_dummy(dev)) - return false; - - if (unlikely(attach_deferred(dev))) - do_deferred_attach(dev); - - ret = identity_mapping(dev); - if (ret) { - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - if (dma_mask >= dma_direct_get_required_mask(dev)) - return false; - - /* - * 32 bit DMA is removed from si_domain and fall back to - * non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - struct iommu_domain *domain; - struct dmar_domain *dmar_domain; - - domain = iommu_get_domain_for_dev(dev); - if (domain) { - dmar_domain = to_dmar_domain(domain); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - } - dmar_remove_one_dev_info(dev); - get_private_domain_for_dev(dev); - } - - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } - - return true; -} - static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -3638,6 +3383,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3689,20 +3437,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, page_to_phys(page) + offset, - size, dir, *dev->dma_mask); - return dma_direct_map_page(dev, page, offset, size, dir, attrs); + return __intel_map_single(dev, page_to_phys(page) + offset, + size, dir, *dev->dma_mask); } static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, phys_addr, size, dir, - *dev->dma_mask); - return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3753,17 +3496,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); - else - dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); + intel_unmap(dev, dev_addr, size); } static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); + intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, @@ -3773,8 +3512,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, struct page *page = NULL; int order; - if (!iommu_need_mapping(dev)) - return dma_direct_alloc(dev, size, dma_handle, flags, attrs); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); size = PAGE_ALIGN(size); order = get_order(size); @@ -3809,9 +3548,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, int order; struct page *page = virt_to_page(vaddr); - if (!iommu_need_mapping(dev)) - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); - size = PAGE_ALIGN(size); order = get_order(size); @@ -3829,9 +3565,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *sg; int i; - if (!iommu_need_mapping(dev)) - return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); - for_each_sg(sglist, sg, nelems, i) { nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); } @@ -3855,8 +3588,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (!iommu_need_mapping(dev)) - return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); + + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); domain = find_domain(dev); if (!domain) @@ -3903,8 +3637,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele static u64 intel_get_required_mask(struct device *dev) { - if (!iommu_need_mapping(dev)) - return dma_direct_get_required_mask(dev); return DMA_BIT_MASK(32); } @@ -4813,58 +4545,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long long start, end; - unsigned long start_vpfn, last_vpfn; + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + mhp->nr_pages - 1); switch (val) { case MEM_GOING_ONLINE: - start = mhp->start_pfn << PAGE_SHIFT; - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; - if (iommu_domain_identity_map(si_domain, start, end)) { - pr_warn("Failed to build identity map for [%llx-%llx]\n", - start, end); + if (iommu_domain_identity_map(si_domain, + start_vpfn, last_vpfn)) { + pr_warn("Failed to build identity map for [%lx-%lx]\n", + start_vpfn, last_vpfn); return NOTIFY_BAD; } break; case MEM_OFFLINE: case MEM_CANCEL_ONLINE: - start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); - while (start_vpfn <= last_vpfn) { - struct iova *iova; + { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; struct page *freelist; - iova = find_iova(&si_domain->iovad, start_vpfn); - if (iova == NULL) { - pr_debug("Failed get IOVA for PFN %lx\n", - start_vpfn); - break; - } - - iova = split_and_remove_iova(&si_domain->iovad, iova, - start_vpfn, last_vpfn); - if (iova == NULL) { - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - - freelist = domain_unmap(si_domain, iova->pfn_lo, - iova->pfn_hi); + freelist = domain_unmap(si_domain, + start_vpfn, last_vpfn); rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain, - iova->pfn_lo, iova_size(iova), + start_vpfn, mhp->nr_pages, !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); - - start_vpfn = iova->pfn_hi + 1; - free_iova_mem(iova); } break; } @@ -4892,8 +4603,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, (u16)did); - if (!domain) + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) continue; + free_cpu_cached_iovas(cpu, &domain->iovad); } } @@ -5186,18 +4898,6 @@ int __init intel_iommu_init(void) } up_write(&dmar_global_lock); -#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) - /* - * If the system has no untrusted device or the user has decided - * to disable the bounce page mechanisms, we don't need swiotlb. - * Mark this and the pre-allocated bounce pages will be released - * later. - */ - if (!has_untrusted_dev() || intel_no_bounce) - swiotlb = 0; -#endif - dma_ops = &intel_dma_ops; - init_iommu_pm_ops(); down_read(&dmar_global_lock); @@ -5283,10 +4983,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID); + PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + if (!dev_is_real_dma_subdevice(info->dev)) + domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); } @@ -5296,12 +4997,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - /* free the private domain */ - if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) - domain_exit(info->domain); - free_devinfo_mem(info); } @@ -5311,9 +5006,8 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; - if (info && info != DEFER_DEVICE_DOMAIN_INFO - && info != DUMMY_DEVICE_DOMAIN_INFO) + info = get_domain_info(dev); + if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5322,9 +5016,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - /* calculate AGAW */ domain->gaw = guest_width; adjust_width = guestwidth_to_adjustwidth(guest_width); @@ -5343,11 +5034,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static void intel_init_iova_domain(struct dmar_domain *dmar_domain) +{ + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); + + if (!intel_iommu_strict && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) + pr_info("iova flush queue initialization failed\n"); +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - int ret; switch (type) { case IOMMU_DOMAIN_DMA: @@ -5364,13 +5065,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { - ret = init_iova_flush_queue(&dmar_domain->iovad, - iommu_flush_iova, - iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } + if (type == IOMMU_DOMAIN_DMA) + intel_init_iova_domain(dmar_domain); domain_update_iommu_cap(dmar_domain); @@ -5403,7 +5099,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) static inline bool is_aux_domain(struct device *dev, struct iommu_domain *domain) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); return info && info->auxd_enabled && domain->type == IOMMU_DOMAIN_UNMANAGED; @@ -5412,7 +5108,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) static void auxiliary_link_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5425,7 +5121,7 @@ static void auxiliary_link_device(struct dmar_domain *domain, static void auxiliary_unlink_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5513,13 +5209,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, return; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); iommu = info->iommu; auxiliary_unlink_device(domain, dev); spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); domain_detach_iommu(domain, iommu); spin_unlock(&iommu->lock); @@ -5626,6 +5322,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +/* + * 2D array for converting and sanitizing IOMMU generic TLB granularity to + * VT-d granularity. Invalidation is typically included in the unmap operation + * as a result of DMA or VFIO unmap. However, for assigned devices guest + * owns the first level page tables. Invalidations of translation caches in the + * guest are trapped and passed down to the host. + * + * vIOMMU in the guest will only expose first level page tables, therefore + * we do not support IOTLB granularity for request without PASID (second level). + * + * For example, to find the VT-d granularity encoding for IOTLB + * type and page selective granularity within PASID: + * X: indexed by iommu cache type + * Y: indexed by enum iommu_inv_granularity + * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] + */ + +static const int +inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { + /* + * PASID based IOTLB invalidation: PASID selective (per PASID), + * page selective (address granularity) + */ + {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, + /* PASID based dev TLBs */ + {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, + /* PASID cache */ + {-EINVAL, -EINVAL, -EINVAL} +}; + +static inline int to_vtd_granularity(int type, int granu) +{ + return inv_type_granu_table[type][granu]; +} + +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) +{ + u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; + + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. + * IOMMU cache invalidate API passes granu_size in bytes, and number of + * granu size in contiguous memory. + */ + return order_base_2(nr_pages); +} + +#ifdef CONFIG_INTEL_IOMMU_SVM +static int +intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int cache_type; + u8 bus, devfn; + u16 did, sid; + int ret = 0; + u64 size = 0; + + if (!inv_info || !dmar_domain || + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + info = get_domain_info(dev); + if (!info) { + ret = -EINVAL; + goto out_unlock; + } + did = dmar_domain->iommu_did[iommu->seq_id]; + sid = PCI_DEVID(bus, devfn); + + /* Size is only valid in address selective invalidation */ + if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + size = to_vtd_size(inv_info->addr_info.granule_size, + inv_info->addr_info.nb_granules); + + for_each_set_bit(cache_type, + (unsigned long *)&inv_info->cache, + IOMMU_CACHE_INV_TYPE_NR) { + int granu = 0; + u64 pasid = 0; + + granu = to_vtd_granularity(cache_type, inv_info->granularity); + if (granu == -EINVAL) { + pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", + cache_type, inv_info->granularity); + break; + } + + /* + * PASID is stored in different locations based on the + * granularity. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID && + (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->pasid_info.pasid; + else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->addr_info.pasid; + + switch (BIT(cache_type)) { + case IOMMU_CACHE_INV_TYPE_IOTLB: + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + size && + (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + inv_info->addr_info.addr, size); + ret = -ERANGE; + goto out_unlock; + } + + /* + * If granu is PASID-selective, address is ignored. + * We use npages = -1 to indicate that. + */ + qi_flush_piotlb(iommu, did, pasid, + mm_to_dma_pfn(inv_info->addr_info.addr), + (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, + inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + + /* + * Always flush device IOTLB if ATS is enabled. vIOMMU + * in the guest may assume IOTLB flush is inclusive, + * which is more efficient. + */ + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + break; + case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + else + pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); + break; + default: + dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", + cache_type); + ret = -EINVAL; + } + } +out_unlock: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +#endif + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5793,11 +5659,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (translation_pre_enabled(iommu)) dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; - if (device_needs_bounce(dev)) { - dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n"); - set_dma_ops(dev, &bounce_dma_ops); - } - return &iommu->iommu; } @@ -5812,7 +5673,19 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); + set_dma_ops(dev, NULL); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(dev); if (device_needs_bounce(dev)) + set_dma_ops(dev, &bounce_dma_ops); + else if (domain && domain->type == IOMMU_DOMAIN_DMA) + set_dma_ops(dev, &intel_dma_ops); + else set_dma_ops(dev, NULL); } @@ -5890,7 +5763,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_supported) goto out; @@ -5986,7 +5859,7 @@ static int intel_iommu_enable_auxd(struct device *dev) return -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); info->auxd_enabled = 1; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -5999,7 +5872,7 @@ static int intel_iommu_disable_auxd(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!WARN_ON(!info)) info->auxd_enabled = 0; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -6052,6 +5925,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && + info->pasid_supported && info->pri_supported && + info->ats_supported; + } + return false; } @@ -6061,6 +5942,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + if (!info) + return -EINVAL; + + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) + return 0; + } + return -ENODEV; } @@ -6076,7 +5967,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) static bool intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); if (feat == IOMMU_DEV_FEAT_AUX) return scalable_mode_support() && info && info->auxd_enabled; @@ -6144,6 +6035,7 @@ const struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, @@ -6156,6 +6048,14 @@ const struct iommu_ops intel_iommu_ops = { .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, +#ifdef CONFIG_INTEL_IOMMU_SVM + .cache_invalidate = intel_iommu_sva_invalidate, + .sva_bind_gpasid = intel_svm_bind_gpasid, + .sva_unbind_gpasid = intel_svm_unbind_gpasid, + .sva_bind = intel_svm_bind, + .sva_unbind = intel_svm_unbind, + .sva_get_pasid = intel_svm_get_pasid, +#endif }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 22b30f10b396..c81f0f17c6ba 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -27,6 +27,63 @@ static DEFINE_SPINLOCK(pasid_lock); u32 intel_pasid_max_id = PASID_MAX; +int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid) +{ + unsigned long flags; + u8 status_code; + int ret = 0; + u64 res; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); + IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, + !(res & VCMD_VRSP_IP), res); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + status_code = VCMD_VRSP_SC(res); + switch (status_code) { + case VCMD_VRSP_SC_SUCCESS: + *pasid = VCMD_VRSP_RESULT_PASID(res); + break; + case VCMD_VRSP_SC_NO_PASID_AVAIL: + pr_info("IOMMU: %s: No PASID available\n", iommu->name); + ret = -ENOSPC; + break; + default: + ret = -ENODEV; + pr_warn("IOMMU: %s: Unexpected error code %d\n", + iommu->name, status_code); + } + + return ret; +} + +void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid) +{ + unsigned long flags; + u8 status_code; + u64 res; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writeq(iommu->reg + DMAR_VCMD_REG, + VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); + IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, + !(res & VCMD_VRSP_IP), res); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + status_code = VCMD_VRSP_SC(res); + switch (status_code) { + case VCMD_VRSP_SC_SUCCESS: + break; + case VCMD_VRSP_SC_INVALID_PASID: + pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); + break; + default: + pr_warn("IOMMU: %s: Unexpected error code %d\n", + iommu->name, status_code); + } +} + /* * Per device pasid table management: */ @@ -94,7 +151,7 @@ int intel_pasid_alloc_table(struct device *dev) int size; might_sleep(); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; @@ -141,7 +198,7 @@ void intel_pasid_free_table(struct device *dev) struct pasid_entry *table; int i, max_pde; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !dev_is_pci(dev) || !info->pasid_table) return; @@ -167,7 +224,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info) return NULL; @@ -178,7 +235,7 @@ int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_table) return 0; @@ -199,7 +256,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) return NULL; dir = pasid_table->table; - info = dev->archdata.iommu; + info = get_domain_info(dev); dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; @@ -235,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -static void intel_pasid_clear_entry(struct device *dev, int pasid) +static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], PASID_PTE_FPD); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static void +intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore) { struct pasid_entry *pe; @@ -243,7 +313,10 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid) if (WARN_ON(!pe)) return; - pasid_clear_entry(pe); + if (fault_ignore && pasid_pte_is_present(pe)) + pasid_clear_entry_with_fpd(pe); + else + pasid_clear_entry(pe); } static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) @@ -359,18 +432,29 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value) pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); } +/* + * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_eafe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); +} + static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, int pasid) { struct qi_desc desc; - desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | + QI_PC_PASID(pasid) | QI_PC_TYPE; desc.qw1 = 0; desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } static void @@ -384,7 +468,7 @@ iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } static void @@ -394,7 +478,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device_domain_info *info; u16 sid, qdep, pfsid; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->ats_enabled) return; @@ -405,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); } -void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid) +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, + int pasid, bool fault_ignore) { struct pasid_entry *pte; u16 did; @@ -416,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, return; did = pasid_get_domain_id(pte); - intel_pasid_clear_entry(dev, pasid); + intel_pasid_clear_entry(dev, pasid, fault_ignore); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); @@ -492,7 +576,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); /* Setup Present and PASID Granular Transfer Type: */ - pasid_set_translation_type(pte, 1); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); @@ -500,6 +584,25 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ +static inline int iommu_skip_agaw(struct dmar_domain *domain, + struct intel_iommu *iommu, + struct dma_pte **pgd) +{ + int agaw; + + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + *pgd = phys_to_virt(dma_pte_addr(*pgd)); + if (!dma_pte_present(*pgd)) + return -EINVAL; + } + + return agaw; +} + +/* * Set up the scalable mode pasid entry for second only translation type. */ int intel_pasid_setup_second_level(struct intel_iommu *iommu, @@ -522,17 +625,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, return -EINVAL; } - /* - * Skip top levels of page tables for iommu which has less agaw - * than default. Unnecessary for PT mode. - */ pgd = domain->pgd; - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - dev_err(dev, "Invalid domain page table\n"); - return -EINVAL; - } + agaw = iommu_skip_agaw(domain, iommu, &pgd); + if (agaw < 0) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; } pgd_val = virt_to_phys(pgd); @@ -548,7 +645,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_slptr(pte, pgd_val); pasid_set_address_width(pte, agaw); - pasid_set_translation_type(pte, 2); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -582,7 +679,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, pasid_clear_entry(pte); pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); - pasid_set_translation_type(pte, 4); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -596,3 +693,161 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } + +static int +intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, + struct iommu_gpasid_bind_data_vtd *pasid_data) +{ + /* + * Not all guest PASID table entry fields are passed down during bind, + * here we only set up the ones that are dependent on guest settings. + * Execution related bits such as NXE, SMEP are not supported. + * Other fields, such as snoop related, are set based on host needs + * regardless of guest settings. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { + if (!ecap_srs(iommu->ecap)) { + pr_err_ratelimited("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { + if (!ecap_eafs(iommu->ecap)) { + pr_err_ratelimited("No extended access flag support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_eafe(pte); + } + + /* + * Memory type is only applicable to devices inside processor coherent + * domain. Will add MTS support once coherent devices are available. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { + pr_warn_ratelimited("No memory type support %s\n", + iommu->name); + return -EINVAL; + } + + return 0; +} + +/** + * intel_pasid_setup_nested() - Set up PASID entry for nested translation. + * This could be used for guest shared virtual address. In this case, the + * first level page tables are used for GVA-GPA translation in the guest, + * second level page tables are used for GPA-HPA translation. + * + * @iommu: IOMMU which the device belong to + * @dev: Device to be set up for translation + * @gpgd: FLPTPTR: First Level Page translation pointer in GPA + * @pasid: PASID to be programmed in the device PASID table + * @pasid_data: Additional PASID info from the guest bind request + * @domain: Domain info for setting up second level page tables + * @addr_width: Address width of the first level (guest) + */ +int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, + pgd_t *gpgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + int ret = 0; + u64 pgd_val; + int agaw; + u16 did; + + if (!ecap_nest(iommu->ecap)) { + pr_err_ratelimited("IOMMU: %s: No nested translation support\n", + iommu->name); + return -EINVAL; + } + + if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { + pr_err_ratelimited("Domain is not in nesting mode, %x\n", + domain->flags); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + /* + * Caller must ensure PASID entry is not in use, i.e. not bind the + * same PASID to the same device twice. + */ + if (pasid_pte_is_present(pte)) + return -EBUSY; + + pasid_clear_entry(pte); + + /* Sanity checking performed by caller to make sure address + * width matching in two dimensions: + * 1. CPU vs. IOMMU + * 2. Guest vs. Host. + */ + switch (addr_width) { +#ifdef CONFIG_X86 + case ADDR_WIDTH_5LEVEL: + if (!cpu_feature_enabled(X86_FEATURE_LA57) || + !cap_5lp_support(iommu->cap)) { + dev_err_ratelimited(dev, + "5-level paging not supported\n"); + return -EINVAL; + } + + pasid_set_flpm(pte, 1); + break; +#endif + case ADDR_WIDTH_4LEVEL: + pasid_set_flpm(pte, 0); + break; + default: + dev_err_ratelimited(dev, "Invalid guest address width %d\n", + addr_width); + return -EINVAL; + } + + /* First level PGD is in GPA, must be supported by the second level */ + if ((uintptr_t)gpgd > domain->max_addr) { + dev_err_ratelimited(dev, + "Guest PGD %lx not supported, max %llx\n", + (uintptr_t)gpgd, domain->max_addr); + return -EINVAL; + } + pasid_set_flptr(pte, (uintptr_t)gpgd); + + ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); + if (ret) + return ret; + + /* Setup the second level based on the given domain */ + pgd = domain->pgd; + + agaw = iommu_skip_agaw(domain, iommu, &pgd); + if (agaw < 0) { + dev_err_ratelimited(dev, "Invalid domain page table\n"); + return -EINVAL; + } + pgd_val = virt_to_phys(pgd); + pasid_set_slptr(pte, pgd_val); + pasid_set_fault_enable(pte); + + did = domain->iommu_did[iommu->seq_id]; + pasid_set_domain_id(pte, did); + + pasid_set_address_width(pte, agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); + pasid_set_present(pte); + pasid_flush_caches(iommu, pte, pasid, did); + + return ret; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 92de6df24ccb..c5318d40e0fa 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -15,6 +15,7 @@ #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 +#define PASID_PTE_FPD 2 #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 #define MAX_NR_PASID_BITS 20 @@ -23,6 +24,16 @@ #define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) #define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) +/* Virtual command interface for enlightened pasid management. */ +#define VCMD_CMD_ALLOC 0x1 +#define VCMD_CMD_FREE 0x2 +#define VCMD_VRSP_IP 0x1 +#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) +#define VCMD_VRSP_SC_SUCCESS 0 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 +#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) +#define VCMD_CMD_OPERAND(e) ((e) << 8) /* * Domain ID reserved for pasid entries programmed for first-level * only and pass-through transfer modes. @@ -36,6 +47,7 @@ * to vmalloc or even module mappings. */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) +#define PASID_FLAG_NESTED BIT(1) /* * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- @@ -51,6 +63,11 @@ struct pasid_entry { u64 val[8]; }; +#define PASID_ENTRY_PGTT_FL_ONLY (1) +#define PASID_ENTRY_PGTT_SL_ONLY (2) +#define PASID_ENTRY_PGTT_NESTED (3) +#define PASID_ENTRY_PGTT_PT (4) + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ @@ -99,7 +116,13 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, int pasid); +int intel_pasid_setup_nested(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid); - + struct device *dev, int pasid, + bool fault_ignore); +int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid); +void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 2998418f0a38..a035ef911fba 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -23,6 +23,7 @@ #include "intel-pasid.h" static irqreturn_t prq_event_thread(int irq, void *d); +static void intel_svm_drain_prq(struct device *dev, int pasid); #define PRQ_ORDER 0 @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); + init_completion(&iommu->prq_complete); + return 0; } @@ -138,7 +141,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, svm->iommu); + qi_submit_sync(svm->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -162,7 +165,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, svm->iommu); + qi_submit_sync(svm->iommu, &desc, 1, 0); } } @@ -206,10 +209,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * *has* to handle gracefully without affecting other processes. */ rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); - } + list_for_each_entry_rcu(sdev, &svm->devs, list) + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + svm->pasid, true); rcu_read_unlock(); } @@ -226,13 +228,212 @@ static LIST_HEAD(global_svm_list); list_for_each_entry((sdev), &(svm)->devs, list) \ if ((d) != (sdev)->dev) {} else -int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) +int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, + struct iommu_gpasid_bind_data *data) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct dmar_domain *dmar_domain; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; + + if (WARN_ON(!iommu) || !data) + return -EINVAL; + + if (data->version != IOMMU_GPASID_BIND_VERSION_1 || + data->format != IOMMU_PASID_FORMAT_INTEL_VTD) + return -EINVAL; + + if (!dev_is_pci(dev)) + return -ENOTSUPP; + + /* VT-d supports devices with full 20 bit PASIDs only */ + if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) + return -EINVAL; + + /* + * We only check host PASID range, we have no knowledge to check + * guest PASID range. + */ + if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) + return -EINVAL; + + dmar_domain = to_dmar_domain(domain); + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, data->hpasid, NULL); + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + if (svm) { + /* + * If we found svm for the PASID, there must be at + * least one device bond, otherwise svm should be freed. + */ + if (WARN_ON(list_empty(&svm->devs))) { + ret = -EINVAL; + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + /* + * For devices with aux domains, we should allow + * multiple bind calls with the same PASID and pdev. + */ + if (iommu_dev_feature_enabled(dev, + IOMMU_DEV_FEAT_AUX)) { + sdev->users++; + } else { + dev_warn_ratelimited(dev, + "Already bound with PASID %u\n", + svm->pasid); + ret = -EBUSY; + } + goto out; + } + } else { + /* We come here when PASID has never been bond to a device. */ + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) { + ret = -ENOMEM; + goto out; + } + /* REVISIT: upper layer/VFIO can track host process that bind + * the PASID. ioasid_set = mm might be sufficient for vfio to + * check pasid VMM ownership. We can drop the following line + * once VFIO and IOASID set check is in place. + */ + svm->mm = get_task_mm(current); + svm->pasid = data->hpasid; + if (data->flags & IOMMU_SVA_GPASID_VAL) { + svm->gpasid = data->gpasid; + svm->flags |= SVM_FLAG_GUEST_PASID; + } + ioasid_set_data(data->hpasid, svm); + INIT_LIST_HEAD_RCU(&svm->devs); + mmput(svm->mm); + } + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) { + ret = -ENOMEM; + goto out; + } + sdev->dev = dev; + + /* Only count users if device has aux domains */ + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users = 1; + + /* Set up device context entry for PASID if not enabled already */ + ret = intel_iommu_enable_pasid(iommu, sdev->dev); + if (ret) { + dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); + kfree(sdev); + goto out; + } + + /* + * PASID table is per device for better security. Therefore, for + * each bind of a new device even with an existing PASID, we need to + * call the nested mode setup function here. + */ + spin_lock(&iommu->lock); + ret = intel_pasid_setup_nested(iommu, dev, + (pgd_t *)(uintptr_t)data->gpgd, + data->hpasid, &data->vtd, dmar_domain, + data->addr_width); + spin_unlock(&iommu->lock); + if (ret) { + dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", + data->hpasid, ret); + /* + * PASID entry should be in cleared state if nested mode + * set up failed. So we only need to clear IOASID tracking + * data such that free call will succeed. + */ + kfree(sdev); + goto out; + } + + svm->flags |= SVM_FLAG_GUEST_MODE; + + init_rcu_head(&sdev->rcu); + list_add_rcu(&sdev->list, &svm->devs); + out: + if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { + ioasid_set_data(data->hpasid, NULL); + kfree(svm); + } + + mutex_unlock(&pasid_mutex); + return ret; +} + +int intel_svm_unbind_gpasid(struct device *dev, int pasid) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = -EINVAL; + + if (WARN_ON(!iommu)) + return -EINVAL; + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, pasid, NULL); + if (!svm) { + ret = -EINVAL; + goto out; + } + + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + ret = 0; + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + /* + * We do not free the IOASID here in that + * IOMMU driver did not allocate it. + * Unlike native SVM, IOASID for guest use was + * allocated prior to the bind call. + * In any case, if the free call comes before + * the unbind, IOMMU driver will get notified + * and perform cleanup. + */ + ioasid_set_data(pasid, NULL); + kfree(svm); + } + } + break; + } +out: + mutex_unlock(&pasid_mutex); + return ret; +} + +/* Caller must hold pasid_mutex, mm reference */ +static int +intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops, + struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; - struct mm_struct *mm = NULL; int pasid_max; int ret; @@ -249,16 +450,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else pasid_max = 1 << 20; + /* Bind supervisor PASID shuld have mm = NULL */ if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) + if (!ecap_srs(iommu->ecap) || mm) { + pr_err("Supervisor PASID with user provided mm.\n"); return -EINVAL; - } else if (pasid) { - mm = get_task_mm(current); - BUG_ON(!mm); + } } - mutex_lock(&pasid_mutex); - if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { + if (!(flags & SVM_FLAG_PRIVATE_PASID)) { struct intel_svm *t; list_for_each_entry(t, &global_svm_list, list) { @@ -296,19 +496,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ sdev->dev = dev; ret = intel_iommu_enable_pasid(iommu, dev); - if (ret || !pasid) { - /* If they don't actually want to assign a PASID, this is - * just an enabling check/preparation. */ - kfree(sdev); - goto out; - } - - info = dev->archdata.iommu; - if (!info || !info->pasid_supported) { + if (ret) { kfree(sdev); goto out; } + info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); if (info->ats_enabled) { @@ -397,26 +590,24 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } } list_add_rcu(&sdev->list, &svm->devs); - - success: - *pasid = svm->pasid; +success: + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + if (sd) + *sd = sdev; ret = 0; out: - mutex_unlock(&pasid_mutex); - if (mm) - mmput(mm); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_bind_mm); -int intel_svm_unbind_mm(struct device *dev, int pasid) +/* Caller must hold pasid_mutex */ +static int intel_svm_unbind_mm(struct device *dev, int pasid) { struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; int ret = -EINVAL; - mutex_lock(&pasid_mutex); iommu = intel_svm_device_to_iommu(dev); if (!iommu) goto out; @@ -442,8 +633,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -462,45 +654,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) break; } out: - mutex_unlock(&pasid_mutex); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); - -int intel_svm_is_pasid_valid(struct device *dev, int pasid) -{ - struct intel_iommu *iommu; - struct intel_svm *svm; - int ret = -EINVAL; - - mutex_lock(&pasid_mutex); - iommu = intel_svm_device_to_iommu(dev); - if (!iommu) - goto out; - - svm = ioasid_find(NULL, pasid, NULL); - if (!svm) - goto out; - - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); - goto out; - } - /* init_mm is used in this case */ - if (!svm->mm) - ret = 1; - else if (atomic_read(&svm->mm->mm_users) > 0) - ret = 1; - else - ret = 0; - - out: - mutex_unlock(&pasid_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { @@ -557,6 +713,93 @@ static bool is_canonical_address(u64 addr) return (((saddr << shift) >> shift) == saddr); } +/** + * intel_svm_drain_prq - Drain page requests and responses for a pasid + * @dev: target device + * @pasid: pasid for draining + * + * Drain all pending page requests and responses related to @pasid in both + * software and hardware. This is supposed to be called after the device + * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB + * and DevTLB have been invalidated. + * + * It waits until all pending page requests for @pasid in the page fault + * queue are completed by the prq handling thread. Then follow the steps + * described in VT-d spec CH7.10 to drain all page requests and page + * responses pending in the hardware. + */ +static void intel_svm_drain_prq(struct device *dev, int pasid) +{ + struct device_domain_info *info; + struct dmar_domain *domain; + struct intel_iommu *iommu; + struct qi_desc desc[3]; + struct pci_dev *pdev; + int head, tail; + u16 sid, did; + int qdep; + + info = get_domain_info(dev); + if (WARN_ON(!info || !dev_is_pci(dev))) + return; + + if (!info->pri_enabled) + return; + + iommu = info->iommu; + domain = info->domain; + pdev = to_pci_dev(dev); + sid = PCI_DEVID(info->bus, info->devfn); + did = domain->iommu_did[iommu->seq_id]; + qdep = pci_ats_queue_depth(pdev); + + /* + * Check and wait until all pending page requests in the queue are + * handled by the prq handling thread. + */ +prq_retry: + reinit_completion(&iommu->prq_complete); + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + while (head != tail) { + struct page_req_dsc *req; + + req = &iommu->prq[head / sizeof(*req)]; + if (!req->pasid_present || req->pasid != pasid) { + head = (head + sizeof(*req)) & PRQ_RING_MASK; + continue; + } + + wait_for_completion(&iommu->prq_complete); + goto prq_retry; + } + + /* + * Perform steps described in VT-d spec CH7.10 to drain page + * requests and responses in hardware. + */ + memset(desc, 0, sizeof(desc)); + desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_FENCE | + QI_IWD_TYPE; + desc[1].qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | + QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | + QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(info->pfsid); +qi_retry: + reinit_completion(&iommu->prq_complete); + qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + wait_for_completion(&iommu->prq_complete); + goto qi_retry; + } +} + static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_iommu *iommu = d; @@ -685,12 +928,75 @@ static irqreturn_t prq_event_thread(int irq, void *d) sizeof(req->priv_data)); resp.qw2 = 0; resp.qw3 = 0; - qi_submit_sync(&resp, iommu); + qi_submit_sync(iommu, &resp, 1, 0); } head = (head + sizeof(*req)) & PRQ_RING_MASK; } dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); + /* + * Clear the page request overflow bit and wake up all threads that + * are waiting for the completion of this handling. + */ + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + + if (!completion_done(&iommu->prq_complete)) + complete(&iommu->prq_complete); + return IRQ_RETVAL(handled); } + +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) +struct iommu_sva * +intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_sva *sva = ERR_PTR(-EINVAL); + struct intel_svm_dev *sdev = NULL; + int flags = 0; + int ret; + + /* + * TODO: Consolidate with generic iommu-sva bind after it is merged. + * It will require shared SVM data structures, i.e. combine io_mm + * and intel_svm etc. + */ + if (drvdata) + flags = *(int *)drvdata; + mutex_lock(&pasid_mutex); + ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + if (ret) + sva = ERR_PTR(ret); + else if (sdev) + sva = &sdev->sva; + else + WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + + mutex_unlock(&pasid_mutex); + + return sva; +} + +void intel_svm_unbind(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + intel_svm_unbind_mm(sdev->dev, sdev->pasid); + mutex_unlock(&pasid_mutex); +} + +int intel_svm_get_pasid(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + int pasid; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + pasid = sdev->pasid; + mutex_unlock(&pasid_mutex); + + return pasid; +} diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 81e43c1df7ec..a042f123b091 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -151,7 +151,7 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) desc.qw2 = 0; desc.qw3 = 0; - return qi_submit_sync(&desc, iommu); + return qi_submit_sync(iommu, &desc, 1, 0); } static int modify_irte(struct irq_2_iommu *irq_iommu, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a9e5618cde80..b5ea203f6c68 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -80,7 +80,8 @@ static bool iommu_cmd_line_dma_api(void) return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API); } -static int iommu_alloc_default_domain(struct device *dev); +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev); static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, unsigned type); static int __iommu_attach_device(struct iommu_domain *domain, @@ -184,6 +185,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) static void dev_iommu_free(struct device *dev) { + iommu_fwspec_free(dev); kfree(dev->iommu); dev->iommu = NULL; } @@ -195,6 +197,9 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list struct iommu_group *group; int ret; + if (!ops) + return -ENODEV; + if (!dev_iommu_get(dev)) return -ENOMEM; @@ -247,17 +252,17 @@ int iommu_probe_device(struct device *dev) if (ret) goto err_out; + group = iommu_group_get(dev); + if (!group) + goto err_release; + /* * Try to allocate a default domain - needs support from the * IOMMU driver. There are still some drivers which don't * support default domains, so the return value is not yet * checked. */ - iommu_alloc_default_domain(dev); - - group = iommu_group_get(dev); - if (!group) - goto err_release; + iommu_alloc_default_domain(group, dev); if (group->default_domain) ret = __iommu_attach_device(group->default_domain, dev); @@ -582,7 +587,7 @@ struct iommu_group *iommu_group_alloc(void) NULL, "%d", group->id); if (ret) { ida_simple_remove(&iommu_group_ida, group->id); - kfree(group); + kobject_put(&group->kobj); return ERR_PTR(ret); } @@ -765,6 +770,15 @@ out: return ret; } +static bool iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + if (domain->ops->is_attach_deferred) + return domain->ops->is_attach_deferred(domain, dev); + + return false; +} + /** * iommu_group_add_device - add a device to an iommu group * @group: the group into which to add the device (reference should be held) @@ -817,7 +831,7 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain) + if (group->domain && !iommu_is_attach_deferred(group->domain, dev)) ret = __iommu_attach_device(group->domain, dev); mutex_unlock(&group->mutex); if (ret) @@ -1474,15 +1488,11 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus, return 0; } -static int iommu_alloc_default_domain(struct device *dev) +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev) { - struct iommu_group *group; unsigned int type; - group = iommu_group_get(dev); - if (!group) - return -ENODEV; - if (group->default_domain) return 0; @@ -1670,17 +1680,8 @@ static void probe_alloc_default_domain(struct bus_type *bus, static int iommu_group_do_dma_attach(struct device *dev, void *data) { struct iommu_domain *domain = data; - const struct iommu_ops *ops; - int ret; - - ret = __iommu_attach_device(domain, dev); - - ops = domain->ops; - if (ret == 0 && ops->probe_finalize) - ops->probe_finalize(dev); - - return ret; + return __iommu_attach_device(domain, dev); } static int __iommu_group_dma_attach(struct iommu_group *group) @@ -1689,6 +1690,22 @@ static int __iommu_group_dma_attach(struct iommu_group *group) iommu_group_do_dma_attach); } +static int iommu_group_do_probe_finalize(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + if (domain->ops->probe_finalize) + domain->ops->probe_finalize(dev); + + return 0; +} + +static void __iommu_group_dma_finalize(struct iommu_group *group) +{ + __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_probe_finalize); +} + static int iommu_do_create_direct_mappings(struct device *dev, void *data) { struct iommu_group *group = data; @@ -1741,6 +1758,8 @@ int bus_iommu_probe(struct bus_type *bus) if (ret) break; + + __iommu_group_dma_finalize(group); } return ret; @@ -1889,9 +1908,6 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { int ret; - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) - return 0; if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; @@ -1963,8 +1979,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(domain, dev)) return; if (unlikely(domain->ops->detach_dev == NULL)) @@ -2532,71 +2547,6 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, } EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); -static int -request_default_domain_for_dev(struct device *dev, unsigned long type) -{ - struct iommu_domain *domain; - struct iommu_group *group; - int ret; - - /* Device must already be in a group before calling this function */ - group = iommu_group_get(dev); - if (!group) - return -EINVAL; - - mutex_lock(&group->mutex); - - ret = 0; - if (group->default_domain && group->default_domain->type == type) - goto out; - - /* Don't change mappings of existing devices */ - ret = -EBUSY; - if (iommu_group_device_count(group) != 1) - goto out; - - ret = -ENOMEM; - domain = __iommu_domain_alloc(dev->bus, type); - if (!domain) - goto out; - - /* Attach the device to the domain */ - ret = __iommu_attach_group(domain, group); - if (ret) { - iommu_domain_free(domain); - goto out; - } - - /* Make the domain the default for this group */ - if (group->default_domain) - iommu_domain_free(group->default_domain); - group->default_domain = domain; - - iommu_create_device_direct_mappings(group, dev); - - dev_info(dev, "Using iommu %s mapping\n", - type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); - - ret = 0; -out: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return ret; -} - -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) -{ - return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY); -} - -/* Request that a device can't be direct mapped by the IOMMU */ -int iommu_request_dma_domain_for_dev(struct device *dev) -{ - return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA); -} - void iommu_set_default_passthrough(bool cmd_line) { if (cmd_line) @@ -2874,17 +2824,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) } EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); -int iommu_sva_set_ops(struct iommu_sva *handle, - const struct iommu_sva_ops *sva_ops) -{ - if (handle->ops && handle->ops != sva_ops) - return -EEXIST; - - handle->ops = sva_ops; - return 0; -} -EXPORT_SYMBOL_GPL(iommu_sva_set_ops); - int iommu_sva_get_pasid(struct iommu_sva *handle) { const struct iommu_ops *ops = handle->dev->bus->iommu_ops; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 0e6a9536eca6..49fc01f2a28d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -253,7 +253,7 @@ int iova_cache_get(void) SLAB_HWCACHE_ALIGN, NULL); if (!iova_cache) { mutex_unlock(&iova_cache_mutex); - printk(KERN_ERR "Couldn't create iova cache\n"); + pr_err("Couldn't create iova cache\n"); return -ENOMEM; } } @@ -718,8 +718,8 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); if (!new_iova) - printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); + pr_err("Reserve iova range %lx@%lx failed\n", + iova->pfn_lo, iova->pfn_lo); } spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index fb7e702dee23..4c2972f3153b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -903,11 +903,8 @@ static const struct iommu_ops ipmmu_ops = { .probe_device = ipmmu_probe_device, .release_device = ipmmu_release_device, .probe_finalize = ipmmu_probe_finalize, -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) - .device_group = generic_device_group, -#else - .device_group = ipmmu_find_group, -#endif + .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA) + ? generic_device_group : ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, }; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 10cd4db0710a..3d8a63555c25 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -34,7 +34,7 @@ __asm__ __volatile__ ( \ /* bitmap of the page sizes currently supported */ #define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) -DEFINE_SPINLOCK(msm_iommu_lock); +static DEFINE_SPINLOCK(msm_iommu_lock); static LIST_HEAD(qcom_iommu_devices); static struct iommu_ops msm_iommu_ops; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 7bdd74c7cb9f..c9d79cff4d17 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -265,10 +265,13 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct dma_iommu_mapping *mtk_mapping; int ret; - if (!data) - return -ENODEV; + /* Only allow the domain created internally. */ + mtk_mapping = data->dev->archdata.iommu; + if (mtk_mapping->domain != domain) + return 0; if (!data->m4u_dom) { data->m4u_dom = dom; @@ -288,9 +291,6 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - if (!data) - return; - mtk_iommu_config(data, dev, false); } @@ -416,6 +416,11 @@ static int mtk_iommu_create_mapping(struct device *dev, return 0; } +static int mtk_iommu_def_domain_type(struct device *dev) +{ + return IOMMU_DOMAIN_UNMANAGED; +} + static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -464,12 +469,10 @@ static void mtk_iommu_probe_finalize(struct device *dev) static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; - data = dev_iommu_priv_get(dev); iommu_fwspec_free(dev); } @@ -525,6 +528,7 @@ static const struct iommu_ops mtk_iommu_ops = { .probe_device = mtk_iommu_probe_device, .probe_finalize = mtk_iommu_probe_finalize, .release_device = mtk_iommu_release_device, + .def_domain_type = mtk_iommu_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, }; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 6699fe6d9e06..c8282cc212cb 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1236,6 +1236,7 @@ static int omap_iommu_probe(struct platform_device *pdev) goto out_group; iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + iommu_device_set_fwnode(&obj->iommu, &of->fwnode); err = iommu_device_register(&obj->iommu); if (err) @@ -1726,6 +1727,9 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev) struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; struct iommu_group *group = ERR_PTR(-EINVAL); + if (!arch_data) + return ERR_PTR(-ENODEV); + if (arch_data->iommu_dev) group = iommu_group_ref_get(arch_data->iommu_dev->group); diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 054e476ebd49..c3e1fbd1988c 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -814,8 +814,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) qcom_iommu->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) + if (res) { qcom_iommu->local_base = devm_ioremap_resource(dev, res); + if (IS_ERR(qcom_iommu->local_base)) + return PTR_ERR(qcom_iommu->local_base); + } qcom_iommu->iface_clk = devm_clk_get(dev, "iface"); if (IS_ERR(qcom_iommu->iface_clk)) { diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c new file mode 100644 index 000000000000..fce605e96aa2 --- /dev/null +++ b/drivers/iommu/sun50i-iommu.c @@ -0,0 +1,1023 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (C) 2016-2018, Allwinner Technology CO., LTD. +// Copyright (C) 2019-2020, Cerno + +#include <linux/bitfield.h> +#include <linux/bug.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/dma-direction.h> +#include <linux/dma-iommu.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/ioport.h> +#include <linux/log2.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define IOMMU_RESET_REG 0x010 +#define IOMMU_ENABLE_REG 0x020 +#define IOMMU_ENABLE_ENABLE BIT(0) + +#define IOMMU_BYPASS_REG 0x030 +#define IOMMU_AUTO_GATING_REG 0x040 +#define IOMMU_AUTO_GATING_ENABLE BIT(0) + +#define IOMMU_WBUF_CTRL_REG 0x044 +#define IOMMU_OOO_CTRL_REG 0x048 +#define IOMMU_4KB_BDY_PRT_CTRL_REG 0x04c +#define IOMMU_TTB_REG 0x050 +#define IOMMU_TLB_ENABLE_REG 0x060 +#define IOMMU_TLB_PREFETCH_REG 0x070 +#define IOMMU_TLB_PREFETCH_MASTER_ENABLE(m) BIT(m) + +#define IOMMU_TLB_FLUSH_REG 0x080 +#define IOMMU_TLB_FLUSH_PTW_CACHE BIT(17) +#define IOMMU_TLB_FLUSH_MACRO_TLB BIT(16) +#define IOMMU_TLB_FLUSH_MICRO_TLB(i) (BIT(i) & GENMASK(5, 0)) + +#define IOMMU_TLB_IVLD_ADDR_REG 0x090 +#define IOMMU_TLB_IVLD_ADDR_MASK_REG 0x094 +#define IOMMU_TLB_IVLD_ENABLE_REG 0x098 +#define IOMMU_TLB_IVLD_ENABLE_ENABLE BIT(0) + +#define IOMMU_PC_IVLD_ADDR_REG 0x0a0 +#define IOMMU_PC_IVLD_ENABLE_REG 0x0a8 +#define IOMMU_PC_IVLD_ENABLE_ENABLE BIT(0) + +#define IOMMU_DM_AUT_CTRL_REG(d) (0x0b0 + ((d) / 2) * 4) +#define IOMMU_DM_AUT_CTRL_RD_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2))) +#define IOMMU_DM_AUT_CTRL_WR_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2) + 1)) + +#define IOMMU_DM_AUT_OVWT_REG 0x0d0 +#define IOMMU_INT_ENABLE_REG 0x100 +#define IOMMU_INT_CLR_REG 0x104 +#define IOMMU_INT_STA_REG 0x108 +#define IOMMU_INT_ERR_ADDR_REG(i) (0x110 + (i) * 4) +#define IOMMU_INT_ERR_ADDR_L1_REG 0x130 +#define IOMMU_INT_ERR_ADDR_L2_REG 0x134 +#define IOMMU_INT_ERR_DATA_REG(i) (0x150 + (i) * 4) +#define IOMMU_L1PG_INT_REG 0x0180 +#define IOMMU_L2PG_INT_REG 0x0184 + +#define IOMMU_INT_INVALID_L2PG BIT(17) +#define IOMMU_INT_INVALID_L1PG BIT(16) +#define IOMMU_INT_MASTER_PERMISSION(m) BIT(m) +#define IOMMU_INT_MASTER_MASK (IOMMU_INT_MASTER_PERMISSION(0) | \ + IOMMU_INT_MASTER_PERMISSION(1) | \ + IOMMU_INT_MASTER_PERMISSION(2) | \ + IOMMU_INT_MASTER_PERMISSION(3) | \ + IOMMU_INT_MASTER_PERMISSION(4) | \ + IOMMU_INT_MASTER_PERMISSION(5)) +#define IOMMU_INT_MASK (IOMMU_INT_INVALID_L1PG | \ + IOMMU_INT_INVALID_L2PG | \ + IOMMU_INT_MASTER_MASK) + +#define PT_ENTRY_SIZE sizeof(u32) + +#define NUM_DT_ENTRIES 4096 +#define DT_SIZE (NUM_DT_ENTRIES * PT_ENTRY_SIZE) + +#define NUM_PT_ENTRIES 256 +#define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE) + +struct sun50i_iommu { + struct iommu_device iommu; + + /* Lock to modify the IOMMU registers */ + spinlock_t iommu_lock; + + struct device *dev; + void __iomem *base; + struct reset_control *reset; + struct clk *clk; + + struct iommu_domain *domain; + struct iommu_group *group; + struct kmem_cache *pt_pool; +}; + +struct sun50i_iommu_domain { + struct iommu_domain domain; + + /* Number of devices attached to the domain */ + refcount_t refcnt; + + /* L1 Page Table */ + u32 *dt; + dma_addr_t dt_dma; + + struct sun50i_iommu *iommu; +}; + +static struct sun50i_iommu_domain *to_sun50i_domain(struct iommu_domain *domain) +{ + return container_of(domain, struct sun50i_iommu_domain, domain); +} + +static struct sun50i_iommu *sun50i_iommu_from_dev(struct device *dev) +{ + return dev_iommu_priv_get(dev); +} + +static u32 iommu_read(struct sun50i_iommu *iommu, u32 offset) +{ + return readl(iommu->base + offset); +} + +static void iommu_write(struct sun50i_iommu *iommu, u32 offset, u32 value) +{ + writel(value, iommu->base + offset); +} + +/* + * The Allwinner H6 IOMMU uses a 2-level page table. + * + * The first level is the usual Directory Table (DT), that consists of + * 4096 4-bytes Directory Table Entries (DTE), each pointing to a Page + * Table (PT). + * + * Each PT consits of 256 4-bytes Page Table Entries (PTE), each + * pointing to a 4kB page of physical memory. + * + * The IOMMU supports a single DT, pointed by the IOMMU_TTB_REG + * register that contains its physical address. + */ + +#define SUN50I_IOVA_DTE_MASK GENMASK(31, 20) +#define SUN50I_IOVA_PTE_MASK GENMASK(19, 12) +#define SUN50I_IOVA_PAGE_MASK GENMASK(11, 0) + +static u32 sun50i_iova_get_dte_index(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_DTE_MASK, iova); +} + +static u32 sun50i_iova_get_pte_index(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_PTE_MASK, iova); +} + +static u32 sun50i_iova_get_page_offset(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_PAGE_MASK, iova); +} + +/* + * Each Directory Table Entry has a Page Table address and a valid + * bit: + + * +---------------------+-----------+-+ + * | PT address | Reserved |V| + * +---------------------+-----------+-+ + * 31:10 - Page Table address + * 9:2 - Reserved + * 1:0 - 1 if the entry is valid + */ + +#define SUN50I_DTE_PT_ADDRESS_MASK GENMASK(31, 10) +#define SUN50I_DTE_PT_ATTRS GENMASK(1, 0) +#define SUN50I_DTE_PT_VALID 1 + +static phys_addr_t sun50i_dte_get_pt_address(u32 dte) +{ + return (phys_addr_t)dte & SUN50I_DTE_PT_ADDRESS_MASK; +} + +static bool sun50i_dte_is_pt_valid(u32 dte) +{ + return (dte & SUN50I_DTE_PT_ATTRS) == SUN50I_DTE_PT_VALID; +} + +static u32 sun50i_mk_dte(dma_addr_t pt_dma) +{ + return (pt_dma & SUN50I_DTE_PT_ADDRESS_MASK) | SUN50I_DTE_PT_VALID; +} + +/* + * Each PTE has a Page address, an authority index and a valid bit: + * + * +----------------+-----+-----+-----+---+-----+ + * | Page address | Rsv | ACI | Rsv | V | Rsv | + * +----------------+-----+-----+-----+---+-----+ + * 31:12 - Page address + * 11:8 - Reserved + * 7:4 - Authority Control Index + * 3:2 - Reserved + * 1 - 1 if the entry is valid + * 0 - Reserved + * + * The way permissions work is that the IOMMU has 16 "domains" that + * can be configured to give each masters either read or write + * permissions through the IOMMU_DM_AUT_CTRL_REG registers. The domain + * 0 seems like the default domain, and its permissions in the + * IOMMU_DM_AUT_CTRL_REG are only read-only, so it's not really + * useful to enforce any particular permission. + * + * Each page entry will then have a reference to the domain they are + * affected to, so that we can actually enforce them on a per-page + * basis. + * + * In order to make it work with the IOMMU framework, we will be using + * 4 different domains, starting at 1: RD_WR, RD, WR and NONE + * depending on the permission we want to enforce. Each domain will + * have each master setup in the same way, since the IOMMU framework + * doesn't seem to restrict page access on a per-device basis. And + * then we will use the relevant domain index when generating the page + * table entry depending on the permissions we want to be enforced. + */ + +enum sun50i_iommu_aci { + SUN50I_IOMMU_ACI_DO_NOT_USE = 0, + SUN50I_IOMMU_ACI_NONE, + SUN50I_IOMMU_ACI_RD, + SUN50I_IOMMU_ACI_WR, + SUN50I_IOMMU_ACI_RD_WR, +}; + +#define SUN50I_PTE_PAGE_ADDRESS_MASK GENMASK(31, 12) +#define SUN50I_PTE_ACI_MASK GENMASK(7, 4) +#define SUN50I_PTE_PAGE_VALID BIT(1) + +static phys_addr_t sun50i_pte_get_page_address(u32 pte) +{ + return (phys_addr_t)pte & SUN50I_PTE_PAGE_ADDRESS_MASK; +} + +static enum sun50i_iommu_aci sun50i_get_pte_aci(u32 pte) +{ + return FIELD_GET(SUN50I_PTE_ACI_MASK, pte); +} + +static bool sun50i_pte_is_page_valid(u32 pte) +{ + return pte & SUN50I_PTE_PAGE_VALID; +} + +static u32 sun50i_mk_pte(phys_addr_t page, int prot) +{ + enum sun50i_iommu_aci aci; + u32 flags = 0; + + if (prot & (IOMMU_READ | IOMMU_WRITE)) + aci = SUN50I_IOMMU_ACI_RD_WR; + else if (prot & IOMMU_READ) + aci = SUN50I_IOMMU_ACI_RD; + else if (prot & IOMMU_WRITE) + aci = SUN50I_IOMMU_ACI_WR; + else + aci = SUN50I_IOMMU_ACI_NONE; + + flags |= FIELD_PREP(SUN50I_PTE_ACI_MASK, aci); + page &= SUN50I_PTE_PAGE_ADDRESS_MASK; + return page | flags | SUN50I_PTE_PAGE_VALID; +} + +static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain, + void *vaddr, unsigned int count) +{ + struct sun50i_iommu *iommu = sun50i_domain->iommu; + dma_addr_t dma = virt_to_phys(vaddr); + size_t size = count * PT_ENTRY_SIZE; + + dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE); +} + +static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu) +{ + u32 reg; + int ret; + + assert_spin_locked(&iommu->iommu_lock); + + iommu_write(iommu, + IOMMU_TLB_FLUSH_REG, + IOMMU_TLB_FLUSH_PTW_CACHE | + IOMMU_TLB_FLUSH_MACRO_TLB | + IOMMU_TLB_FLUSH_MICRO_TLB(5) | + IOMMU_TLB_FLUSH_MICRO_TLB(4) | + IOMMU_TLB_FLUSH_MICRO_TLB(3) | + IOMMU_TLB_FLUSH_MICRO_TLB(2) | + IOMMU_TLB_FLUSH_MICRO_TLB(1) | + IOMMU_TLB_FLUSH_MICRO_TLB(0)); + + ret = readl_poll_timeout(iommu->base + IOMMU_TLB_FLUSH_REG, + reg, !reg, + 1, 2000); + if (ret) + dev_warn(iommu->dev, "TLB Flush timed out!\n"); + + return ret; +} + +static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + unsigned long flags; + + /* + * At boot, we'll have a first call into .flush_iotlb_all right after + * .probe_device, and since we link our (single) domain to our iommu in + * the .attach_device callback, we don't have that pointer set. + * + * It shouldn't really be any trouble to ignore it though since we flush + * all caches as part of the device powerup. + */ + if (!iommu) + return; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + sun50i_iommu_flush_all_tlb(iommu); + spin_unlock_irqrestore(&iommu->iommu_lock, flags); +} + +static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + sun50i_iommu_flush_iotlb_all(domain); +} + +static int sun50i_iommu_enable(struct sun50i_iommu *iommu) +{ + struct sun50i_iommu_domain *sun50i_domain; + unsigned long flags; + int ret; + + if (!iommu->domain) + return 0; + + sun50i_domain = to_sun50i_domain(iommu->domain); + + ret = reset_control_deassert(iommu->reset); + if (ret) + return ret; + + ret = clk_prepare_enable(iommu->clk); + if (ret) + goto err_reset_assert; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + + iommu_write(iommu, IOMMU_TTB_REG, sun50i_domain->dt_dma); + iommu_write(iommu, IOMMU_TLB_PREFETCH_REG, + IOMMU_TLB_PREFETCH_MASTER_ENABLE(0) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(1) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(2) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(5)); + iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK); + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE), + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5)); + + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_RD), + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 0) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 1) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 2) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 3) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 4) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 5)); + + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_WR), + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 0) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 1) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 2) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 3) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 4) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 5)); + + ret = sun50i_iommu_flush_all_tlb(iommu); + if (ret) { + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + goto err_clk_disable; + } + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE); + iommu_write(iommu, IOMMU_ENABLE_REG, IOMMU_ENABLE_ENABLE); + + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + return 0; + +err_clk_disable: + clk_disable_unprepare(iommu->clk); + +err_reset_assert: + reset_control_assert(iommu->reset); + + return ret; +} + +static void sun50i_iommu_disable(struct sun50i_iommu *iommu) +{ + unsigned long flags; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + + iommu_write(iommu, IOMMU_ENABLE_REG, 0); + iommu_write(iommu, IOMMU_TTB_REG, 0); + + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + clk_disable_unprepare(iommu->clk); + reset_control_assert(iommu->reset); +} + +static void *sun50i_iommu_alloc_page_table(struct sun50i_iommu *iommu, + gfp_t gfp) +{ + dma_addr_t pt_dma; + u32 *page_table; + + page_table = kmem_cache_zalloc(iommu->pt_pool, gfp); + if (!page_table) + return ERR_PTR(-ENOMEM); + + pt_dma = dma_map_single(iommu->dev, page_table, PT_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(iommu->dev, pt_dma)) { + dev_err(iommu->dev, "Couldn't map L2 Page Table\n"); + kmem_cache_free(iommu->pt_pool, page_table); + return ERR_PTR(-ENOMEM); + } + + /* We rely on the physical address and DMA address being the same */ + WARN_ON(pt_dma != virt_to_phys(page_table)); + + return page_table; +} + +static void sun50i_iommu_free_page_table(struct sun50i_iommu *iommu, + u32 *page_table) +{ + phys_addr_t pt_phys = virt_to_phys(page_table); + + dma_unmap_single(iommu->dev, pt_phys, PT_SIZE, DMA_TO_DEVICE); + kmem_cache_free(iommu->pt_pool, page_table); +} + +static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, + dma_addr_t iova, gfp_t gfp) +{ + struct sun50i_iommu *iommu = sun50i_domain->iommu; + u32 *page_table; + u32 *dte_addr; + u32 old_dte; + u32 dte; + + dte_addr = &sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + dte = *dte_addr; + if (sun50i_dte_is_pt_valid(dte)) { + phys_addr_t pt_phys = sun50i_dte_get_pt_address(dte); + return (u32 *)phys_to_virt(pt_phys); + } + + page_table = sun50i_iommu_alloc_page_table(iommu, gfp); + if (IS_ERR(page_table)) + return page_table; + + dte = sun50i_mk_dte(virt_to_phys(page_table)); + old_dte = cmpxchg(dte_addr, 0, dte); + if (old_dte) { + phys_addr_t installed_pt_phys = + sun50i_dte_get_pt_address(old_dte); + u32 *installed_pt = phys_to_virt(installed_pt_phys); + u32 *drop_pt = page_table; + + page_table = installed_pt; + dte = old_dte; + sun50i_iommu_free_page_table(iommu, drop_pt); + } + + sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); + sun50i_table_flush(sun50i_domain, dte_addr, 1); + + return page_table; +} + +static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + u32 pte_index; + u32 *page_table, *pte_addr; + int ret = 0; + + page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp); + if (IS_ERR(page_table)) { + ret = PTR_ERR(page_table); + goto out; + } + + pte_index = sun50i_iova_get_pte_index(iova); + pte_addr = &page_table[pte_index]; + if (unlikely(sun50i_pte_is_page_valid(*pte_addr))) { + phys_addr_t page_phys = sun50i_pte_get_page_address(*pte_addr); + dev_err(iommu->dev, + "iova %pad already mapped to %pa cannot remap to %pa prot: %#x\n", + &iova, &page_phys, &paddr, prot); + ret = -EBUSY; + goto out; + } + + *pte_addr = sun50i_mk_pte(paddr, prot); + sun50i_table_flush(sun50i_domain, pte_addr, 1); + +out: + return ret; +} + +static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + phys_addr_t pt_phys; + dma_addr_t pte_dma; + u32 *pte_addr; + u32 dte; + + dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + if (!sun50i_dte_is_pt_valid(dte)) + return 0; + + pt_phys = sun50i_dte_get_pt_address(dte); + pte_addr = (u32 *)phys_to_virt(pt_phys) + sun50i_iova_get_pte_index(iova); + pte_dma = pt_phys + sun50i_iova_get_pte_index(iova) * PT_ENTRY_SIZE; + + if (!sun50i_pte_is_page_valid(*pte_addr)) + return 0; + + memset(pte_addr, 0, sizeof(*pte_addr)); + sun50i_table_flush(sun50i_domain, pte_addr, 1); + + return SZ_4K; +} + +static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + phys_addr_t pt_phys; + u32 *page_table; + u32 dte, pte; + + dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + if (!sun50i_dte_is_pt_valid(dte)) + return 0; + + pt_phys = sun50i_dte_get_pt_address(dte); + page_table = (u32 *)phys_to_virt(pt_phys); + pte = page_table[sun50i_iova_get_pte_index(iova)]; + if (!sun50i_pte_is_page_valid(pte)) + return 0; + + return sun50i_pte_get_page_address(pte) + + sun50i_iova_get_page_offset(iova); +} + +static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) +{ + struct sun50i_iommu_domain *sun50i_domain; + + if (type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY && + type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL); + if (!sun50i_domain) + return NULL; + + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&sun50i_domain->domain)) + goto err_free_domain; + + sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(DT_SIZE)); + if (!sun50i_domain->dt) + goto err_put_cookie; + + refcount_set(&sun50i_domain->refcnt, 1); + + sun50i_domain->domain.geometry.aperture_start = 0; + sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); + sun50i_domain->domain.geometry.force_aperture = true; + + return &sun50i_domain->domain; + +err_put_cookie: + if (type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&sun50i_domain->domain); + +err_free_domain: + kfree(sun50i_domain); + + return NULL; +} + +static void sun50i_iommu_domain_free(struct iommu_domain *domain) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + + free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE)); + sun50i_domain->dt = NULL; + + iommu_put_dma_cookie(domain); + + kfree(sun50i_domain); +} + +static int sun50i_iommu_attach_domain(struct sun50i_iommu *iommu, + struct sun50i_iommu_domain *sun50i_domain) +{ + iommu->domain = &sun50i_domain->domain; + sun50i_domain->iommu = iommu; + + sun50i_domain->dt_dma = dma_map_single(iommu->dev, sun50i_domain->dt, + DT_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(iommu->dev, sun50i_domain->dt_dma)) { + dev_err(iommu->dev, "Couldn't map L1 Page Table\n"); + return -ENOMEM; + } + + return sun50i_iommu_enable(iommu); +} + +static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu, + struct sun50i_iommu_domain *sun50i_domain) +{ + unsigned int i; + + for (i = 0; i < NUM_DT_ENTRIES; i++) { + phys_addr_t pt_phys; + u32 *page_table; + u32 *dte_addr; + u32 dte; + + dte_addr = &sun50i_domain->dt[i]; + dte = *dte_addr; + if (!sun50i_dte_is_pt_valid(dte)) + continue; + + memset(dte_addr, 0, sizeof(*dte_addr)); + sun50i_table_flush(sun50i_domain, dte_addr, 1); + + pt_phys = sun50i_dte_get_pt_address(dte); + page_table = phys_to_virt(pt_phys); + sun50i_iommu_free_page_table(iommu, page_table); + } + + + sun50i_iommu_disable(iommu); + + dma_unmap_single(iommu->dev, virt_to_phys(sun50i_domain->dt), + DT_SIZE, DMA_TO_DEVICE); + + iommu->domain = NULL; +} + +static void sun50i_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = dev_iommu_priv_get(dev); + + dev_dbg(dev, "Detaching from IOMMU domain\n"); + + if (iommu->domain != domain) + return; + + if (refcount_dec_and_test(&sun50i_domain->refcnt)) + sun50i_iommu_detach_domain(iommu, sun50i_domain); +} + +static int sun50i_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu; + + iommu = sun50i_iommu_from_dev(dev); + if (!iommu) + return -ENODEV; + + dev_dbg(dev, "Attaching to IOMMU domain\n"); + + refcount_inc(&sun50i_domain->refcnt); + + if (iommu->domain == domain) + return 0; + + if (iommu->domain) + sun50i_iommu_detach_device(iommu->domain, dev); + + sun50i_iommu_attach_domain(iommu, sun50i_domain); + + return 0; +} + +static struct iommu_device *sun50i_iommu_probe_device(struct device *dev) +{ + struct sun50i_iommu *iommu; + + iommu = sun50i_iommu_from_dev(dev); + if (!iommu) + return ERR_PTR(-ENODEV); + + return &iommu->iommu; +} + +static void sun50i_iommu_release_device(struct device *dev) {} + +static struct iommu_group *sun50i_iommu_device_group(struct device *dev) +{ + struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev); + + return iommu_group_ref_get(iommu->group); +} + +static int sun50i_iommu_of_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct platform_device *iommu_pdev = of_find_device_by_node(args->np); + unsigned id = args->args[0]; + + dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev)); + + return iommu_fwspec_add_ids(dev, &id, 1); +} + +static const struct iommu_ops sun50i_iommu_ops = { + .pgsize_bitmap = SZ_4K, + .attach_dev = sun50i_iommu_attach_device, + .detach_dev = sun50i_iommu_detach_device, + .device_group = sun50i_iommu_device_group, + .domain_alloc = sun50i_iommu_domain_alloc, + .domain_free = sun50i_iommu_domain_free, + .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync = sun50i_iommu_iotlb_sync, + .iova_to_phys = sun50i_iommu_iova_to_phys, + .map = sun50i_iommu_map, + .of_xlate = sun50i_iommu_of_xlate, + .probe_device = sun50i_iommu_probe_device, + .release_device = sun50i_iommu_release_device, + .unmap = sun50i_iommu_unmap, +}; + +static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, + unsigned master, phys_addr_t iova, + unsigned prot) +{ + dev_err(iommu->dev, "Page fault for %pad (master %d, dir %s)\n", + &iova, master, (prot == IOMMU_FAULT_WRITE) ? "wr" : "rd"); + + if (iommu->domain) + report_iommu_fault(iommu->domain, iommu->dev, iova, prot); + else + dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n"); +} + +static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu, + unsigned addr_reg, + unsigned blame_reg) +{ + phys_addr_t iova; + unsigned master; + u32 blame; + + assert_spin_locked(&iommu->iommu_lock); + + iova = iommu_read(iommu, addr_reg); + blame = iommu_read(iommu, blame_reg); + master = ilog2(blame & IOMMU_INT_MASTER_MASK); + + /* + * If the address is not in the page table, we can't get what + * operation triggered the fault. Assume it's a read + * operation. + */ + sun50i_iommu_report_fault(iommu, master, iova, IOMMU_FAULT_READ); + + return iova; +} + +static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) +{ + enum sun50i_iommu_aci aci; + phys_addr_t iova; + unsigned master; + unsigned dir; + u32 blame; + + assert_spin_locked(&iommu->iommu_lock); + + blame = iommu_read(iommu, IOMMU_INT_STA_REG); + master = ilog2(blame & IOMMU_INT_MASTER_MASK); + iova = iommu_read(iommu, IOMMU_INT_ERR_ADDR_REG(master)); + aci = sun50i_get_pte_aci(iommu_read(iommu, + IOMMU_INT_ERR_DATA_REG(master))); + + switch (aci) { + /* + * If we are in the read-only domain, then it means we + * tried to write. + */ + case SUN50I_IOMMU_ACI_RD: + dir = IOMMU_FAULT_WRITE; + break; + + /* + * If we are in the write-only domain, then it means + * we tried to read. + */ + case SUN50I_IOMMU_ACI_WR: + + /* + * If we are in the domain without any permission, we + * can't really tell. Let's default to a read + * operation. + */ + case SUN50I_IOMMU_ACI_NONE: + + /* WTF? */ + case SUN50I_IOMMU_ACI_RD_WR: + default: + dir = IOMMU_FAULT_READ; + break; + } + + /* + * If the address is not in the page table, we can't get what + * operation triggered the fault. Assume it's a read + * operation. + */ + sun50i_iommu_report_fault(iommu, master, iova, dir); + + return iova; +} + +static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) +{ + struct sun50i_iommu *iommu = dev_id; + phys_addr_t iova; + u32 status; + + spin_lock(&iommu->iommu_lock); + + status = iommu_read(iommu, IOMMU_INT_STA_REG); + if (!(status & IOMMU_INT_MASK)) { + spin_unlock(&iommu->iommu_lock); + return IRQ_NONE; + } + + if (status & IOMMU_INT_INVALID_L2PG) + iova = sun50i_iommu_handle_pt_irq(iommu, + IOMMU_INT_ERR_ADDR_L2_REG, + IOMMU_L2PG_INT_REG); + else if (status & IOMMU_INT_INVALID_L1PG) + iova = sun50i_iommu_handle_pt_irq(iommu, + IOMMU_INT_ERR_ADDR_L1_REG, + IOMMU_L1PG_INT_REG); + else + iova = sun50i_iommu_handle_perm_irq(iommu); + + iommu_write(iommu, IOMMU_INT_CLR_REG, status); + + iommu_write(iommu, IOMMU_RESET_REG, ~status); + iommu_write(iommu, IOMMU_RESET_REG, status); + + spin_unlock(&iommu->iommu_lock); + + return IRQ_HANDLED; +} + +static int sun50i_iommu_probe(struct platform_device *pdev) +{ + struct sun50i_iommu *iommu; + int ret, irq; + + iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENOMEM; + spin_lock_init(&iommu->iommu_lock); + platform_set_drvdata(pdev, iommu); + iommu->dev = &pdev->dev; + + iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev), + PT_SIZE, PT_SIZE, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu->pt_pool) + return -ENOMEM; + + iommu->group = iommu_group_alloc(); + if (IS_ERR(iommu->group)) { + ret = PTR_ERR(iommu->group); + goto err_free_cache; + } + + iommu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(iommu->base)) { + ret = PTR_ERR(iommu->base); + goto err_free_group; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_free_group; + } + + iommu->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(iommu->clk)) { + dev_err(&pdev->dev, "Couldn't get our clock.\n"); + ret = PTR_ERR(iommu->clk); + goto err_free_group; + } + + iommu->reset = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(iommu->reset)) { + dev_err(&pdev->dev, "Couldn't get our reset line.\n"); + ret = PTR_ERR(iommu->reset); + goto err_free_group; + } + + ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev, + NULL, dev_name(&pdev->dev)); + if (ret) + goto err_free_group; + + iommu_device_set_ops(&iommu->iommu, &sun50i_iommu_ops); + iommu_device_set_fwnode(&iommu->iommu, &pdev->dev.of_node->fwnode); + + ret = iommu_device_register(&iommu->iommu); + if (ret) + goto err_remove_sysfs; + + ret = devm_request_irq(&pdev->dev, irq, sun50i_iommu_irq, 0, + dev_name(&pdev->dev), iommu); + if (ret < 0) + goto err_unregister; + + bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops); + + return 0; + +err_unregister: + iommu_device_unregister(&iommu->iommu); + +err_remove_sysfs: + iommu_device_sysfs_remove(&iommu->iommu); + +err_free_group: + iommu_group_put(iommu->group); + +err_free_cache: + kmem_cache_destroy(iommu->pt_pool); + + return ret; +} + +static const struct of_device_id sun50i_iommu_dt[] = { + { .compatible = "allwinner,sun50i-h6-iommu", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, sun50i_iommu_dt); + +static struct platform_driver sun50i_iommu_driver = { + .driver = { + .name = "sun50i-iommu", + .of_match_table = sun50i_iommu_dt, + .suppress_bind_attrs = true, + } +}; +builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe); + +MODULE_DESCRIPTION("Allwinner H6 IOMMU driver"); +MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>"); +MODULE_AUTHOR("zhuxianbin <zhuxianbin@allwinnertech.com>"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index bda300c2a438..f6f07489a9aa 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev, if (!region) return -ENOMEM; - list_add(&vdev->resv_regions, ®ion->list); + list_add(®ion->list, &vdev->resv_regions); return 0; } |