diff options
Diffstat (limited to 'drivers/iommu')
36 files changed, 3382 insertions, 1414 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6ee3a25ae731..49bd2ab8c507 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) + depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -76,6 +76,8 @@ config IOMMU_DMA config FSL_PAMU bool "Freescale IOMMU support" + depends on PCI + depends on PHYS_64BIT depends on PPC_E500MC || (COMPILE_TEST && PPC) select IOMMU_API select GENERIC_ALLOCATOR @@ -219,7 +221,7 @@ config OMAP_IOMMU_DEBUG config ROCKCHIP_IOMMU bool "Rockchip IOMMU Support" - depends on ARM + depends on ARM || ARM64 depends on ARCH_ROCKCHIP || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU @@ -253,6 +255,7 @@ config TEGRA_IOMMU_SMMU config EXYNOS_IOMMU bool "Exynos IOMMU Support" depends on ARCH_EXYNOS && MMU + depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian ptes select IOMMU_API select ARM_DMA_USE_IOMMU help @@ -274,7 +277,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARM_LPAE + depends on ARM || IOMMU_DMA depends on ARCH_RENESAS || COMPILE_TEST select IOMMU_API select IOMMU_IO_PGTABLE_LPAE @@ -367,4 +370,14 @@ config MTK_IOMMU_V1 if unsure, say N here. +config QCOM_IOMMU + # Note: iommu drivers cannot (yet?) be built as modules + bool "Qualcomm IOMMU Support" + depends on ARCH_QCOM || COMPILE_TEST + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU + help + Support for IOMMU on certain Qualcomm SoCs. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 195f7b997d8e..b910aea813a1 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,3 +27,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o +obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 63cacf5d6cf2..538c16f669f9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -54,6 +54,8 @@ #include "amd_iommu_types.h" #include "irq_remapping.h" +#define AMD_IOMMU_MAPPING_ERROR 0 + #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) #define LOOP_TIMEOUT 100000 @@ -89,25 +91,6 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - struct dma_ops_domain *dma_dom; -}; - -struct flush_queue { - spinlock_t lock; - unsigned next; - struct flush_queue_entry *entries; -}; - -static DEFINE_PER_CPU(struct flush_queue, flush_queue); - -static atomic_t queue_timer_on; -static struct timer_list queue_timer; - /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -120,27 +103,6 @@ int amd_iommu_max_glx_val = -1; static const struct dma_map_ops amd_iommu_dma_ops; /* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct list_head dev_data_list; /* For global dev_data_list */ - struct protection_domain *domain; /* Domain the device is bound to */ - u16 devid; /* PCI Device ID */ - u16 alias; /* Alias Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Device is identity mapped */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for - PPR completions */ - u32 errata; /* Bitmap for errata to apply */ - bool use_vapic; /* Enable device to use vapic mode */ -}; - -/* * general struct to manage commands send to an IOMMU */ struct iommu_cmd { @@ -152,6 +114,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +static void iova_domain_flush_tlb(struct iova_domain *iovad); /* * Data container for a dma_ops specific protection domain @@ -253,6 +216,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) list_add_tail(&dev_data->dev_data_list, &dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + ratelimit_default_init(&dev_data->rs); + return dev_data; } @@ -340,19 +305,25 @@ static u16 get_alias(struct device *dev) static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; dev_data = search_dev_data(devid); - if (dev_data == NULL) + if (dev_data == NULL) { dev_data = alloc_dev_data(devid); + if (translation_pre_enabled(iommu)) + dev_data->defer_attach = true; + } + return dev_data; } -static struct iommu_dev_data *get_dev_data(struct device *dev) +struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; } +EXPORT_SYMBOL(get_dev_data); /* * Find or create an IOMMU group for a acpihid device. @@ -551,6 +522,29 @@ static void dump_command(unsigned long phys_addr) pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); } +static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, + u64 address, int flags) +{ + struct iommu_dev_data *dev_data = NULL; + struct pci_dev *pdev; + + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (pdev) + dev_data = get_dev_data(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", + domain_id, address, flags); + } else if (printk_ratelimit()) { + pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domain_id, address, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { int type, devid, domid, flags; @@ -575,7 +569,12 @@ retry: goto retry; } - printk(KERN_ERR "AMD-Vi: Event logged ["); + if (type == EVENT_TYPE_IO_FAULT) { + amd_iommu_report_page_fault(devid, domid, address, flags); + return; + } else { + printk(KERN_ERR "AMD-Vi: Event logged ["); + } switch (type) { case EVENT_TYPE_ILL_DEV: @@ -585,12 +584,6 @@ retry: address, flags); dump_dte_entry(devid); break; - case EVENT_TYPE_IO_FAULT: - printk("IO_PAGE_FAULT device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); - break; case EVENT_TYPE_DEV_TAB_ERR: printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", @@ -848,19 +841,20 @@ static int wait_on_sem(volatile u64 *sem) } static void copy_cmd_to_buffer(struct amd_iommu *iommu, - struct iommu_cmd *cmd, - u32 tail) + struct iommu_cmd *cmd) { u8 *target; - target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + target = iommu->cmd_buf + iommu->cmd_buf_tail; + + iommu->cmd_buf_tail += sizeof(*cmd); + iommu->cmd_buf_tail %= CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); /* Tell the IOMMU about it */ - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } static void build_completion_wait(struct iommu_cmd *cmd, u64 address) @@ -1018,33 +1012,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, struct iommu_cmd *cmd, bool sync) { - u32 left, tail, head, next_tail; + unsigned int count = 0; + u32 left, next_tail; + next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; again: - - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; - left = (head - next_tail) % CMD_BUFFER_SIZE; + left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; if (left <= 0x20) { - struct iommu_cmd sync_cmd; - int ret; - - iommu->cmd_sem = 0; + /* Skip udelay() the first time around */ + if (count++) { + if (count == LOOP_TIMEOUT) { + pr_err("AMD-Vi: Command buffer timeout\n"); + return -EIO; + } - build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); - copy_cmd_to_buffer(iommu, &sync_cmd, tail); + udelay(1); + } - if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) - return ret; + /* Update head and recheck remaining space */ + iommu->cmd_buf_head = readl(iommu->mmio_base + + MMIO_CMD_HEAD_OFFSET); goto again; } - copy_cmd_to_buffer(iommu, cmd, tail); + copy_cmd_to_buffer(iommu, cmd); - /* We need to sync now to make sure all commands are processed */ + /* Do we need to make sure all commands are processed? */ iommu->need_sync = sync; return 0; @@ -1110,7 +1105,7 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } -static void iommu_flush_dte_all(struct amd_iommu *iommu) +static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; @@ -1124,7 +1119,7 @@ static void iommu_flush_dte_all(struct amd_iommu *iommu) * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. */ -static void iommu_flush_tlb_all(struct amd_iommu *iommu) +static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; @@ -1138,7 +1133,7 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } -static void iommu_flush_all(struct amd_iommu *iommu) +static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1157,7 +1152,7 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) iommu_queue_command(iommu, &cmd); } -static void iommu_flush_irt_all(struct amd_iommu *iommu) +static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; @@ -1170,11 +1165,11 @@ static void iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { - iommu_flush_all(iommu); + amd_iommu_flush_all(iommu); } else { - iommu_flush_dte_all(iommu); - iommu_flush_irt_all(iommu); - iommu_flush_tlb_all(iommu); + amd_iommu_flush_dte_all(iommu); + amd_iommu_flush_irt_all(iommu); + amd_iommu_flush_tlb_all(iommu); } } @@ -1482,9 +1477,9 @@ static int iommu_map_page(struct protection_domain *dom, if (count > 1) { __pte = PAGE_SIZE_PTE(phys_addr, page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -1733,6 +1728,21 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } +static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) +{ + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); +} + +static void iova_domain_flush_tlb(struct iova_domain *iovad) +{ + struct dma_ops_domain *dom; + + dom = container_of(iovad, struct dma_ops_domain, iovad); + + dma_ops_domain_flush_tlb(dom); +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -1779,6 +1789,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); + if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) + goto free_dma_dom; + /* Initialize reserved ranges */ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); @@ -1811,7 +1824,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; flags = amd_iommu_dev_table[devid].data[1]; @@ -1844,7 +1857,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - flags &= ~(0xffffUL); + flags &= ~DEV_DOMID_MASK; flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; @@ -1854,7 +1867,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) static void clear_dte_entry(u16 devid) { /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); @@ -2225,92 +2238,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -static void __queue_flush(struct flush_queue *queue) -{ - struct protection_domain *domain; - unsigned long flags; - int idx; - - /* First flush TLB of all known domains */ - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_for_each_entry(domain, &amd_iommu_pd_list, list) - domain_flush_tlb(domain); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); - - /* Wait until flushes have completed */ - domain_flush_complete(NULL); - - for (idx = 0; idx < queue->next; ++idx) { - struct flush_queue_entry *entry; - - entry = queue->entries + idx; - - free_iova_fast(&entry->dma_dom->iovad, - entry->iova_pfn, - entry->pages); - - /* Not really necessary, just to make sure we catch any bugs */ - entry->dma_dom = NULL; - } - - queue->next = 0; -} - -static void queue_flush_all(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(&flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - if (queue->next > 0) - __queue_flush(queue); - spin_unlock_irqrestore(&queue->lock, flags); - } -} - -static void queue_flush_timeout(unsigned long unsused) -{ - atomic_set(&queue_timer_on, 0); - queue_flush_all(); -} - -static void queue_add(struct dma_ops_domain *dma_dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue_entry *entry; - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(&flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - if (queue->next == FLUSH_QUEUE_SIZE) - __queue_flush(queue); - - idx = queue->next++; - entry = queue->entries + idx; - - entry->iova_pfn = address; - entry->pages = pages; - entry->dma_dom = dma_dom; - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) - mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(&flush_queue); -} - - /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -2321,11 +2248,21 @@ static void queue_add(struct dma_ops_domain *dma_dom, static struct protection_domain *get_domain(struct device *dev) { struct protection_domain *domain; + struct iommu_domain *io_domain; if (!check_device(dev)) return ERR_PTR(-EINVAL); domain = get_dev_data(dev)->domain; + if (domain == NULL && get_dev_data(dev)->defer_attach) { + get_dev_data(dev)->defer_attach = false; + io_domain = iommu_get_domain_for_dev(dev); + domain = to_pdomain(io_domain); + attach_device(dev, domain); + } + if (domain == NULL) + return ERR_PTR(-EBUSY); + if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); @@ -2371,6 +2308,7 @@ static int dir2prot(enum dma_data_direction direction) else return 0; } + /* * This function contains common code for mapping of a physically * contiguous memory region into DMA address space. It is used by all @@ -2394,7 +2332,7 @@ static dma_addr_t __map_single(struct device *dev, paddr &= PAGE_MASK; address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); - if (address == DMA_ERROR_CODE) + if (address == AMD_IOMMU_MAPPING_ERROR) goto out; prot = dir2prot(direction); @@ -2431,7 +2369,7 @@ out_unmap: dma_ops_free_iova(dma_dom, address, pages); - return DMA_ERROR_CODE; + return AMD_IOMMU_MAPPING_ERROR; } /* @@ -2462,7 +2400,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - queue_add(dma_dom, dma_addr, pages); + pages = __roundup_pow_of_two(pages); + queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); } } @@ -2483,7 +2422,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; else if (IS_ERR(domain)) - return DMA_ERROR_CODE; + return AMD_IOMMU_MAPPING_ERROR; dma_mask = *dev->dma_mask; dma_dom = to_dma_ops_domain(domain); @@ -2560,7 +2499,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); - if (address == DMA_ERROR_CODE) + if (address == AMD_IOMMU_MAPPING_ERROR) goto out_err; prot = dir2prot(direction); @@ -2683,7 +2622,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), size, DMA_BIDIRECTIONAL, dma_mask); - if (*dma_addr == DMA_ERROR_CODE) + if (*dma_addr == AMD_IOMMU_MAPPING_ERROR) goto out_free; return page_address(page); @@ -2729,9 +2668,16 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { + if (!x86_dma_supported(dev, mask)) + return 0; return check_device(dev); } +static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == AMD_IOMMU_MAPPING_ERROR; +} + static const struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, @@ -2740,6 +2686,7 @@ static const struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, + .mapping_error = amd_iommu_mapping_error, }; static int init_reserved_iova_ranges(void) @@ -2797,7 +2744,7 @@ static int init_reserved_iova_ranges(void) int __init amd_iommu_init_api(void) { - int ret, cpu, err = 0; + int ret, err = 0; ret = iova_cache_get(); if (ret) @@ -2807,18 +2754,6 @@ int __init amd_iommu_init_api(void) if (ret) return ret; - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * - sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) - goto out_put_iova; - - spin_lock_init(&queue->lock); - } - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; @@ -2830,23 +2765,12 @@ int __init amd_iommu_init_api(void) err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); if (err) return err; - return 0; - -out_put_iova: - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - kfree(queue->entries); - } - - return -ENOMEM; + return 0; } int __init amd_iommu_init_dma_ops(void) { - setup_timer(&queue_timer, queue_flush_timeout, 0); - atomic_set(&queue_timer_on, 0); - swiotlb = iommu_pass_through ? 1 : 0; iommu_detected = 1; @@ -3002,12 +2926,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) switch (dom->type) { case IOMMU_DOMAIN_DMA: - /* - * First make sure the domain is no longer referenced from the - * flush queue - */ - queue_flush_all(); - /* Now release the domain */ dma_dom = to_dma_ops_domain(domain); dma_ops_domain_free(dma_dom); @@ -3237,6 +3155,13 @@ static void amd_iommu_apply_resv_region(struct device *dev, WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } +static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + return dev_data->defer_attach; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3253,6 +3178,7 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .put_resv_regions = amd_iommu_put_resv_regions, .apply_resv_region = amd_iommu_apply_resv_region, + .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; @@ -3641,11 +3567,6 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; -#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) -#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) -#define DTE_IRQ_REMAP_ENABLE 1ULL - static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) { u64 dte; @@ -3879,11 +3800,9 @@ static void irte_ga_prepare(void *entry, u8 vector, u32 dest_apicid, int devid) { struct irte_ga *irte = (struct irte_ga *) entry; - struct iommu_dev_data *dev_data = search_dev_data(devid); irte->lo.val = 0; irte->hi.val = 0; - irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0; irte->lo.fields_remap.int_type = delivery_mode; irte->lo.fields_remap.dm = dest_mode; irte->hi.fields.vector = vector; @@ -3939,10 +3858,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, struct irte_ga *irte = (struct irte_ga *) entry; struct iommu_dev_data *dev_data = search_dev_data(devid); - if (!dev_data || !dev_data->use_vapic) { + if (!dev_data || !dev_data->use_vapic || + !irte->lo.fields_remap.guest_mode) { irte->hi.fields.vector = vector; irte->lo.fields_remap.destination = dest_apicid; - irte->lo.fields_remap.guest_mode = 0; modify_irte_ga(devid, index, irte, NULL); } } @@ -4273,7 +4192,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain, irte_info->index); } -static struct irq_domain_ops amd_ir_domain_ops = { +static const struct irq_domain_ops amd_ir_domain_ops = { .alloc = irq_remapping_alloc, .free = irq_remapping_free, .activate = irq_remapping_activate, @@ -4318,6 +4237,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) /* Setting */ irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); irte->hi.fields.vector = vcpu_pi_info->vector; + irte->lo.fields_vapic.ga_log_intr = 1; irte->lo.fields_vapic.guest_mode = 1; irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; @@ -4386,21 +4306,29 @@ static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) } static struct irq_chip amd_ir_chip = { - .irq_ack = ir_ack_apic_edge, - .irq_set_affinity = amd_ir_set_affinity, - .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, - .irq_compose_msi_msg = ir_compose_msi_msg, + .name = "AMD-IR", + .irq_ack = ir_ack_apic_edge, + .irq_set_affinity = amd_ir_set_affinity, + .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, + .irq_compose_msi_msg = ir_compose_msi_msg, }; int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { - iommu->ir_domain = irq_domain_add_tree(NULL, &amd_ir_domain_ops, iommu); + struct fwnode_handle *fn; + + fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index); + if (!fn) + return -ENOMEM; + iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu); + irq_domain_free_fwnode(fn); if (!iommu->ir_domain) return -ENOMEM; iommu->ir_domain->parent = arch_get_ir_parent_domain(); - iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); - + iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, + "AMD-IR-MSI", + iommu->index); return 0; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5a11328f4d98..ff8887ac5555 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -37,6 +37,7 @@ #include <asm/io_apic.h> #include <asm/irq_remapping.h> +#include <linux/crash_dump.h> #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" @@ -195,6 +196,11 @@ spinlock_t amd_iommu_pd_lock; * page table root pointer. */ struct dev_table_entry *amd_iommu_dev_table; +/* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ +static struct dev_table_entry *old_dev_tbl_cpy; /* * The alias table is a driver specific data structure which contains the @@ -208,6 +214,7 @@ u16 *amd_iommu_alias_table; * for a specific device. It is also indexed by the PCI device id. */ struct amd_iommu **amd_iommu_rlookup_table; +EXPORT_SYMBOL(amd_iommu_rlookup_table); /* * This table is used to find the irq remapping table for a given device id @@ -236,6 +243,7 @@ enum iommu_init_state { IOMMU_INITIALIZED, IOMMU_NOT_FOUND, IOMMU_INIT_ERROR, + IOMMU_CMDLINE_DISABLED, }; /* Early ioapic and hpet maps from kernel command line */ @@ -256,6 +264,28 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static bool amd_iommu_pre_enabled = true; + +bool translation_pre_enabled(struct amd_iommu *iommu) +{ + return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); +} +EXPORT_SYMBOL(translation_pre_enabled); + +static void clear_translation_pre_enabled(struct amd_iommu *iommu) +{ + iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; +} + +static void init_translation_status(struct amd_iommu *iommu) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + if (ctrl & (1<<CONTROL_IOMMU_EN)) + iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; +} + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -588,6 +618,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + iommu->cmd_buf_head = 0; + iommu->cmd_buf_tail = 0; iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); } @@ -611,6 +643,14 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) amd_iommu_reset_cmd_buffer(iommu); } +/* + * This function disables the command buffer + */ +static void iommu_disable_command_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); +} + static void __init free_command_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); @@ -643,6 +683,14 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); } +/* + * This function disables the event log buffer + */ +static void iommu_disable_event_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); +} + static void __init free_event_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); @@ -804,6 +852,96 @@ static int get_dev_entry_bit(u16 devid, u8 bit) } +static bool copy_device_table(void) +{ + u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; + struct dev_table_entry *old_devtb = NULL; + u32 lo, hi, devid, old_devtb_size; + phys_addr_t old_devtb_phys; + struct amd_iommu *iommu; + u16 dom_id, dte_v, irq_v; + gfp_t gfp_flag; + u64 tmp; + + if (!amd_iommu_pre_enabled) + return false; + + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + for_each_iommu(iommu) { + /* All IOMMUs should use the same device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; + if (last_entry && last_entry != entry) { + pr_err("IOMMU:%d should use the same dev table as others!/n", + iommu->index); + return false; + } + last_entry = entry; + + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!/n", + iommu->index); + return false; + } + } + + old_devtb_phys = entry & PAGE_MASK; + if (old_devtb_phys >= 0x100000000ULL) { + pr_err("The address of old device table is above 4G, not trustworthy!/n"); + return false; + } + old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + if (!old_devtb) + return false; + + gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; + old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(dev_table_size)); + if (old_dev_tbl_cpy == NULL) { + pr_err("Failed to allocate memory for copying old device table!/n"); + return false; + } + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + old_dev_tbl_cpy[devid] = old_devtb[devid]; + dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; + dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; + + if (dte_v && dom_id) { + old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; + __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + /* If gcr3 table existed, mask it out */ + if (old_devtb[devid].data[0] & DTE_FLAG_GV) { + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; + tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + old_dev_tbl_cpy[devid].data[1] &= ~tmp; + tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; + tmp |= DTE_FLAG_GV; + old_dev_tbl_cpy[devid].data[0] &= ~tmp; + } + } + + irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; + int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; + int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK; + if (irq_v && (int_ctl || int_tab_len)) { + if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || + (int_tab_len != DTE_IRQ_TABLE_LEN)) { + pr_err("Wrong old irq remapping flag: %#x\n", devid); + return false; + } + + old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + } + } + memunmap(old_devtb); + + return true; +} + void amd_iommu_apply_erratum_63(u16 devid) { int sysmgt; @@ -1395,6 +1533,16 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; + init_translation_status(iommu); + if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { + iommu_disable(iommu); + clear_translation_pre_enabled(iommu); + pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", + iommu->index); + } + if (amd_iommu_pre_enabled) + amd_iommu_pre_enabled = translation_pre_enabled(iommu); + ret = init_iommu_from_acpi(iommu, h); if (ret) return ret; @@ -1888,8 +2036,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) } /* - * Init the device table to not allow DMA access for devices and - * suppress all page faults + * Init the device table to not allow DMA access for devices */ static void init_device_table_dma(void) { @@ -2010,24 +2157,62 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #endif } +static void early_enable_iommu(struct amd_iommu *iommu) +{ + iommu_disable(iommu); + iommu_init_flags(iommu); + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_set_exclusion_range(iommu); + iommu_enable_ga(iommu); + iommu_enable(iommu); + iommu_flush_all_caches(iommu); +} + /* * This function finally enables all IOMMUs found in the system after - * they have been initialized + * they have been initialized. + * + * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy + * the old content of device table entries. Not this case or copy failed, + * just continue as normal kernel does. */ static void early_enable_iommus(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { - iommu_disable(iommu); - iommu_init_flags(iommu); - iommu_set_device_table(iommu); - iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); - iommu_set_exclusion_range(iommu); - iommu_enable_ga(iommu); - iommu_enable(iommu); - iommu_flush_all_caches(iommu); + + if (!copy_device_table()) { + /* + * If come here because of failure in copying device table from old + * kernel with all IOMMUs enabled, print error message and try to + * free allocated old_dev_tbl_cpy. + */ + if (amd_iommu_pre_enabled) + pr_err("Failed to copy DEV table from previous kernel.\n"); + if (old_dev_tbl_cpy != NULL) + free_pages((unsigned long)old_dev_tbl_cpy, + get_order(dev_table_size)); + + for_each_iommu(iommu) { + clear_translation_pre_enabled(iommu); + early_enable_iommu(iommu); + } + } else { + pr_info("Copied DEV table from previous kernel.\n"); + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + amd_iommu_dev_table = old_dev_tbl_cpy; + for_each_iommu(iommu) { + iommu_disable_command_buffer(iommu); + iommu_disable_event_buffer(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_enable_ga(iommu); + iommu_set_device_table(iommu); + iommu_flush_all_caches(iommu); + } } #ifdef CONFIG_IRQ_REMAP @@ -2097,23 +2282,27 @@ static struct syscore_ops amd_iommu_syscore_ops = { .resume = amd_iommu_resume, }; -static void __init free_on_init_error(void) +static void __init free_iommu_resources(void) { kmemleak_free(irq_lookup_table); free_pages((unsigned long)irq_lookup_table, get_order(rlookup_table_size)); + irq_lookup_table = NULL; kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); + amd_iommu_rlookup_table = NULL; free_pages((unsigned long)amd_iommu_alias_table, get_order(alias_table_size)); + amd_iommu_alias_table = NULL; free_pages((unsigned long)amd_iommu_dev_table, get_order(dev_table_size)); + amd_iommu_dev_table = NULL; free_iommu_all(); @@ -2183,6 +2372,7 @@ static void __init free_dma_resources(void) { free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); + amd_iommu_pd_alloc_bitmap = NULL; free_unity_maps(); } @@ -2258,7 +2448,8 @@ static int __init early_amd_iommu_init(void) /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + amd_iommu_dev_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO | GFP_DMA32, get_order(dev_table_size)); if (amd_iommu_dev_table == NULL) goto out; @@ -2307,6 +2498,10 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* Disable any previously enabled IOMMUs */ + if (!is_kdump_kernel() || amd_iommu_disabled) + disable_iommus(); + if (amd_iommu_irq_remap) amd_iommu_irq_remap = check_ioapic_information(); @@ -2410,14 +2605,21 @@ static int __init state_next(void) case IOMMU_IVRS_DETECTED: ret = early_amd_iommu_init(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; + if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { + pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n"); + free_dma_resources(); + free_iommu_resources(); + init_state = IOMMU_CMDLINE_DISABLED; + ret = -EINVAL; + } break; case IOMMU_ACPI_FINISHED: early_enable_iommus(); - register_syscore_ops(&amd_iommu_syscore_ops); x86_platform.iommu_shutdown = disable_iommus; init_state = IOMMU_ENABLED; break; case IOMMU_ENABLED: + register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; enable_iommus_v2(); @@ -2438,6 +2640,7 @@ static int __init state_next(void) break; case IOMMU_NOT_FOUND: case IOMMU_INIT_ERROR: + case IOMMU_CMDLINE_DISABLED: /* Error states => do nothing */ ret = -EINVAL; break; @@ -2451,13 +2654,14 @@ static int __init state_next(void) static int __init iommu_go_to_state(enum iommu_init_state state) { - int ret = 0; + int ret = -EINVAL; while (init_state != state) { - ret = state_next(); - if (init_state == IOMMU_NOT_FOUND || - init_state == IOMMU_INIT_ERROR) + if (init_state == IOMMU_NOT_FOUND || + init_state == IOMMU_INIT_ERROR || + init_state == IOMMU_CMDLINE_DISABLED) break; + ret = state_next(); } return ret; @@ -2522,7 +2726,7 @@ static int __init amd_iommu_init(void) free_dma_resources(); if (!irq_remapping_enabled) { disable_iommus(); - free_on_init_error(); + free_iommu_resources(); } else { struct amd_iommu *iommu; @@ -2549,9 +2753,6 @@ int __init amd_iommu_detect(void) if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return -ENODEV; - if (amd_iommu_disabled) - return -ENODEV; - ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) return ret; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 466260f8a1df..90e62e9b01c5 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -87,4 +87,6 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) return !!(iommu->features & f); } +extern bool translation_pre_enabled(struct amd_iommu *iommu); +extern struct iommu_dev_data *get_dev_data(struct device *dev); #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 4de8f4160bb8..5f775fef341c 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -250,6 +250,14 @@ #define GA_GUEST_NR 0x1 +/* Bit value definition for dte irq remapping fields*/ +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) +#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 @@ -265,7 +273,7 @@ #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ - IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) #define PM_MAP_4k 0 @@ -314,18 +322,29 @@ #define PTE_LEVEL_PAGE_SIZE(level) \ (1ULL << (12 + (9 * (level)))) -#define IOMMU_PTE_P (1ULL << 0) -#define IOMMU_PTE_TV (1ULL << 1) +/* + * Bit value definition for I/O PTE fields + */ +#define IOMMU_PTE_PR (1ULL << 0) #define IOMMU_PTE_U (1ULL << 59) #define IOMMU_PTE_FC (1ULL << 60) #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +/* + * Bit value definition for DTE fields + */ +#define DTE_FLAG_V (1ULL << 0) +#define DTE_FLAG_TV (1ULL << 1) +#define DTE_FLAG_IR (1ULL << 61) +#define DTE_FLAG_IW (1ULL << 62) + #define DTE_FLAG_IOTLB (1ULL << 32) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) +#define DEV_DOMID_MASK 0xffffULL #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) @@ -342,7 +361,7 @@ #define GCR3_VALID 0x01ULL #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) -#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) @@ -434,6 +453,8 @@ struct iommu_domain; struct irq_domain; struct amd_irte_ops; +#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -516,6 +537,8 @@ struct amd_iommu { /* command buffer virtual address */ u8 *cmd_buf; + u32 cmd_buf_head; + u32 cmd_buf_tail; /* event buffer virtual address */ u8 *evt_buf; @@ -566,12 +589,15 @@ struct amd_iommu { struct amd_irte_ops *irte_ops; #endif + u32 flags; volatile u64 __aligned(8) cmd_sem; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) { - return container_of(dev, struct amd_iommu, iommu.dev); + struct iommu_device *iommu = dev_to_iommu_device(dev); + + return container_of(iommu, struct amd_iommu, iommu); } #define ACPIHID_UID_LEN 256 @@ -594,6 +620,30 @@ struct devid_map { bool cmd_line; }; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct protection_domain *domain; /* Domain the device is bound to */ + u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Device is identity mapped */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ + bool use_vapic; /* Enable device to use vapic mode */ + bool defer_attach; + + struct ratelimit_state rs; /* Ratelimit IOPF messages */ +}; + /* Map HPET and IOAPIC ids to the devid used by the IOMMU */ extern struct list_head ioapic_map; extern struct list_head hpet_map; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 6629c472eafd..e705fac89cb4 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -562,14 +562,30 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag; + u16 tag, devid; int ret; + struct iommu_dev_data *dev_data; + struct pci_dev *pdev = NULL; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; + devid = iommu_fault->device_id; + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (!pdev) + return -ENODEV; + dev_data = get_dev_data(&pdev->dev); + + /* In kdump kernel pci dev is not initialized yet -> send INVALID */ ret = NOTIFY_DONE; + if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) + && dev_data->defer_attach) { + amd_iommu_complete_ppr(pdev, iommu_fault->pasid, + PPR_INVALID, tag); + goto out; + } + dev_state = get_device_state(iommu_fault->device_id); if (dev_state == NULL) goto out; diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h new file mode 100644 index 000000000000..a1226e4ab5f8 --- /dev/null +++ b/drivers/iommu/arm-smmu-regs.h @@ -0,0 +1,220 @@ +/* + * IOMMU API for ARM architected SMMU implementations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#ifndef _ARM_SMMU_REGS_H +#define _ARM_SMMU_REGS_H + +/* Configuration registers */ +#define ARM_SMMU_GR0_sCR0 0x0 +#define sCR0_CLIENTPD (1 << 0) +#define sCR0_GFRE (1 << 1) +#define sCR0_GFIE (1 << 2) +#define sCR0_EXIDENABLE (1 << 3) +#define sCR0_GCFGFRE (1 << 4) +#define sCR0_GCFGFIE (1 << 5) +#define sCR0_USFCFG (1 << 10) +#define sCR0_VMIDPNE (1 << 11) +#define sCR0_PTM (1 << 12) +#define sCR0_FB (1 << 13) +#define sCR0_VMID16EN (1 << 31) +#define sCR0_BSU_SHIFT 14 +#define sCR0_BSU_MASK 0x3 + +/* Auxiliary Configuration register */ +#define ARM_SMMU_GR0_sACR 0x10 + +/* Identification registers */ +#define ARM_SMMU_GR0_ID0 0x20 +#define ARM_SMMU_GR0_ID1 0x24 +#define ARM_SMMU_GR0_ID2 0x28 +#define ARM_SMMU_GR0_ID3 0x2c +#define ARM_SMMU_GR0_ID4 0x30 +#define ARM_SMMU_GR0_ID5 0x34 +#define ARM_SMMU_GR0_ID6 0x38 +#define ARM_SMMU_GR0_ID7 0x3c +#define ARM_SMMU_GR0_sGFSR 0x48 +#define ARM_SMMU_GR0_sGFSYNR0 0x50 +#define ARM_SMMU_GR0_sGFSYNR1 0x54 +#define ARM_SMMU_GR0_sGFSYNR2 0x58 + +#define ID0_S1TS (1 << 30) +#define ID0_S2TS (1 << 29) +#define ID0_NTS (1 << 28) +#define ID0_SMS (1 << 27) +#define ID0_ATOSNS (1 << 26) +#define ID0_PTFS_NO_AARCH32 (1 << 25) +#define ID0_PTFS_NO_AARCH32S (1 << 24) +#define ID0_CTTW (1 << 14) +#define ID0_NUMIRPT_SHIFT 16 +#define ID0_NUMIRPT_MASK 0xff +#define ID0_NUMSIDB_SHIFT 9 +#define ID0_NUMSIDB_MASK 0xf +#define ID0_EXIDS (1 << 8) +#define ID0_NUMSMRG_SHIFT 0 +#define ID0_NUMSMRG_MASK 0xff + +#define ID1_PAGESIZE (1 << 31) +#define ID1_NUMPAGENDXB_SHIFT 28 +#define ID1_NUMPAGENDXB_MASK 7 +#define ID1_NUMS2CB_SHIFT 16 +#define ID1_NUMS2CB_MASK 0xff +#define ID1_NUMCB_SHIFT 0 +#define ID1_NUMCB_MASK 0xff + +#define ID2_OAS_SHIFT 4 +#define ID2_OAS_MASK 0xf +#define ID2_IAS_SHIFT 0 +#define ID2_IAS_MASK 0xf +#define ID2_UBS_SHIFT 8 +#define ID2_UBS_MASK 0xf +#define ID2_PTFS_4K (1 << 12) +#define ID2_PTFS_16K (1 << 13) +#define ID2_PTFS_64K (1 << 14) +#define ID2_VMID16 (1 << 15) + +#define ID7_MAJOR_SHIFT 4 +#define ID7_MAJOR_MASK 0xf + +/* Global TLB invalidation */ +#define ARM_SMMU_GR0_TLBIVMID 0x64 +#define ARM_SMMU_GR0_TLBIALLNSNH 0x68 +#define ARM_SMMU_GR0_TLBIALLH 0x6c +#define ARM_SMMU_GR0_sTLBGSYNC 0x70 +#define ARM_SMMU_GR0_sTLBGSTATUS 0x74 +#define sTLBGSTATUS_GSACTIVE (1 << 0) + +/* Stream mapping registers */ +#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) +#define SMR_VALID (1 << 31) +#define SMR_MASK_SHIFT 16 +#define SMR_ID_SHIFT 0 + +#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) +#define S2CR_CBNDX_SHIFT 0 +#define S2CR_CBNDX_MASK 0xff +#define S2CR_EXIDVALID (1 << 10) +#define S2CR_TYPE_SHIFT 16 +#define S2CR_TYPE_MASK 0x3 +enum arm_smmu_s2cr_type { + S2CR_TYPE_TRANS, + S2CR_TYPE_BYPASS, + S2CR_TYPE_FAULT, +}; + +#define S2CR_PRIVCFG_SHIFT 24 +#define S2CR_PRIVCFG_MASK 0x3 +enum arm_smmu_s2cr_privcfg { + S2CR_PRIVCFG_DEFAULT, + S2CR_PRIVCFG_DIPAN, + S2CR_PRIVCFG_UNPRIV, + S2CR_PRIVCFG_PRIV, +}; + +/* Context bank attribute registers */ +#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) +#define CBAR_VMID_SHIFT 0 +#define CBAR_VMID_MASK 0xff +#define CBAR_S1_BPSHCFG_SHIFT 8 +#define CBAR_S1_BPSHCFG_MASK 3 +#define CBAR_S1_BPSHCFG_NSH 3 +#define CBAR_S1_MEMATTR_SHIFT 12 +#define CBAR_S1_MEMATTR_MASK 0xf +#define CBAR_S1_MEMATTR_WB 0xf +#define CBAR_TYPE_SHIFT 16 +#define CBAR_TYPE_MASK 0x3 +#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT) +#define CBAR_IRPTNDX_SHIFT 24 +#define CBAR_IRPTNDX_MASK 0xff + +#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) +#define CBA2R_RW64_32BIT (0 << 0) +#define CBA2R_RW64_64BIT (1 << 0) +#define CBA2R_VMID_SHIFT 16 +#define CBA2R_VMID_MASK 0xffff + +#define ARM_SMMU_CB_SCTLR 0x0 +#define ARM_SMMU_CB_ACTLR 0x4 +#define ARM_SMMU_CB_RESUME 0x8 +#define ARM_SMMU_CB_TTBCR2 0x10 +#define ARM_SMMU_CB_TTBR0 0x20 +#define ARM_SMMU_CB_TTBR1 0x28 +#define ARM_SMMU_CB_TTBCR 0x30 +#define ARM_SMMU_CB_CONTEXTIDR 0x34 +#define ARM_SMMU_CB_S1_MAIR0 0x38 +#define ARM_SMMU_CB_S1_MAIR1 0x3c +#define ARM_SMMU_CB_PAR 0x50 +#define ARM_SMMU_CB_FSR 0x58 +#define ARM_SMMU_CB_FAR 0x60 +#define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIVA 0x600 +#define ARM_SMMU_CB_S1_TLBIASID 0x610 +#define ARM_SMMU_CB_S1_TLBIVAL 0x620 +#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 +#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_TLBSYNC 0x7f0 +#define ARM_SMMU_CB_TLBSTATUS 0x7f4 +#define ARM_SMMU_CB_ATS1PR 0x800 +#define ARM_SMMU_CB_ATSR 0x8f0 + +#define SCTLR_S1_ASIDPNE (1 << 12) +#define SCTLR_CFCFG (1 << 7) +#define SCTLR_CFIE (1 << 6) +#define SCTLR_CFRE (1 << 5) +#define SCTLR_E (1 << 4) +#define SCTLR_AFE (1 << 2) +#define SCTLR_TRE (1 << 1) +#define SCTLR_M (1 << 0) + +#define CB_PAR_F (1 << 0) + +#define ATSR_ACTIVE (1 << 0) + +#define RESUME_RETRY (0 << 0) +#define RESUME_TERMINATE (1 << 0) + +#define TTBCR2_SEP_SHIFT 15 +#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) +#define TTBCR2_AS (1 << 4) + +#define TTBRn_ASID_SHIFT 48 + +#define FSR_MULTI (1 << 31) +#define FSR_SS (1 << 30) +#define FSR_UUT (1 << 8) +#define FSR_ASF (1 << 7) +#define FSR_TLBLKF (1 << 6) +#define FSR_TLBMCF (1 << 5) +#define FSR_EF (1 << 4) +#define FSR_PF (1 << 3) +#define FSR_AFF (1 << 2) +#define FSR_TF (1 << 1) + +#define FSR_IGN (FSR_AFF | FSR_ASF | \ + FSR_TLBMCF | FSR_TLBLKF) +#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ + FSR_EF | FSR_PF | FSR_TF | FSR_IGN) + +#define FSYNR0_WNR (1 << 4) + +#endif /* _ARM_SMMU_REGS_H */ diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 380969aa60d5..e67ba6c40faf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -408,10 +408,20 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 +#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_HISILICON_HI161X +#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 +#endif + +#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 +#endif + static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -597,6 +607,7 @@ struct arm_smmu_device { u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) +#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) u32 options; struct arm_smmu_cmdq cmdq; @@ -604,6 +615,7 @@ struct arm_smmu_device { struct arm_smmu_priq priq; int gerr_irq; + int combined_irq; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -645,7 +657,6 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; enum arm_smmu_domain_stage stage; union { @@ -663,9 +674,20 @@ struct arm_smmu_option_prop { static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, + { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, { 0, NULL}, }; +static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, + struct arm_smmu_device *smmu) +{ + if ((offset > SZ_64K) && + (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)) + offset -= SZ_64K; + + return smmu->base + offset; +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -737,7 +759,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q) */ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) { - ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); + ktime_t timeout; + unsigned int delay = 1; + + /* Wait longer if it's queue drain */ + timeout = ktime_add_us(ktime_get(), drain ? + ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : + ARM_SMMU_POLL_TIMEOUT_US); while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) @@ -747,7 +775,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) wfe(); } else { cpu_relax(); - udelay(1); + udelay(delay); + delay *= 2; } } @@ -1302,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) return IRQ_HANDLED; } +static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) +{ + struct arm_smmu_device *smmu = dev; + + arm_smmu_evtq_thread(irq, dev); + if (smmu->features & ARM_SMMU_FEAT_PRI) + arm_smmu_priq_thread(irq, dev); + + return IRQ_HANDLED; +} + +static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) +{ + arm_smmu_gerror_handler(irq, dev); + arm_smmu_cmdq_sync_handler(irq, dev); + return IRQ_WAKE_THREAD; +} + /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { @@ -1406,7 +1453,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); return &smmu_domain->domain; } @@ -1555,6 +1601,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENCY) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -1675,44 +1724,29 @@ out_unlock: static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1720,11 +1754,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->iova_to_phys(ops, iova); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - - return ret; + return ops->iova_to_phys(ops, iova); } static struct platform_driver arm_smmu_driver; @@ -1961,8 +1991,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return -ENOMEM; } - q->prod_reg = smmu->base + prod_off; - q->cons_reg = smmu->base + cons_off; + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); + q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); q->ent_dwords = dwords; q->q_base = Q_BASE_RWA; @@ -2218,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) devm_add_action(dev, arm_smmu_free_msis, dev); } -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) { - int ret, irq; - u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; - - /* Disable IRQs first */ - ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, - ARM_SMMU_IRQ_CTRLACK); - if (ret) { - dev_err(smmu->dev, "failed to disable irqs\n"); - return ret; - } + int irq, ret; arm_smmu_setup_msis(smmu); @@ -2272,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) if (ret < 0) dev_warn(smmu->dev, "failed to enable priq irq\n"); - else - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; } } +} + +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +{ + int ret, irq; + u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; + + /* Disable IRQs first */ + ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, + ARM_SMMU_IRQ_CTRLACK); + if (ret) { + dev_err(smmu->dev, "failed to disable irqs\n"); + return ret; + } + + irq = smmu->combined_irq; + if (irq) { + /* + * Cavium ThunderX2 implementation doesn't not support unique + * irq lines. Use single irq line for all the SMMUv3 interrupts. + */ + ret = devm_request_threaded_irq(smmu->dev, irq, + arm_smmu_combined_irq_handler, + arm_smmu_combined_irq_thread, + IRQF_ONESHOT, + "arm-smmu-v3-combined-irq", smmu); + if (ret < 0) + dev_warn(smmu->dev, "failed to enable combined irq\n"); + } else + arm_smmu_setup_unique_irqs(smmu); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, irqen_flags, @@ -2363,8 +2415,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); - writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD); - writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS); + writel_relaxed(smmu->evtq.q.prod, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); + writel_relaxed(smmu->evtq.q.cons, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); enables |= CR0_EVTQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2379,9 +2433,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writeq_relaxed(smmu->priq.q.q_base, smmu->base + ARM_SMMU_PRIQ_BASE); writel_relaxed(smmu->priq.q.prod, - smmu->base + ARM_SMMU_PRIQ_PROD); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); writel_relaxed(smmu->priq.q.cons, - smmu->base + ARM_SMMU_PRIQ_CONS); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); enables |= CR0_PRIQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2605,6 +2659,20 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI +static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +{ + switch (model) { + case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: + smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; + break; + case ACPI_IORT_SMMU_HISILICON_HI161X: + smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; + break; + } + + dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); +} + static int arm_smmu_device_acpi_probe(struct platform_device *pdev, struct arm_smmu_device *smmu) { @@ -2617,6 +2685,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; + acpi_smmu_get_options(iort_smmu->model, smmu); + if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; @@ -2652,6 +2722,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return ret; } +static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) +{ + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) + return SZ_64K; + else + return SZ_128K; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -2668,9 +2746,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } smmu->dev = dev; + if (dev->of_node) { + ret = arm_smmu_device_dt_probe(pdev, smmu); + } else { + ret = arm_smmu_device_acpi_probe(pdev, smmu); + if (ret == -ENODEV) + return ret; + } + + /* Set bypass mode according to firmware probing result */ + bypass = !!ret; + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < SZ_128K) { + if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } @@ -2681,33 +2770,27 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return PTR_ERR(smmu->base); /* Interrupt lines */ - irq = platform_get_irq_byname(pdev, "eventq"); - if (irq > 0) - smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "priq"); + irq = platform_get_irq_byname(pdev, "combined"); if (irq > 0) - smmu->priq.q.irq = irq; + smmu->combined_irq = irq; + else { + irq = platform_get_irq_byname(pdev, "eventq"); + if (irq > 0) + smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; + irq = platform_get_irq_byname(pdev, "priq"); + if (irq > 0) + smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); - if (irq > 0) - smmu->gerr_irq = irq; + irq = platform_get_irq_byname(pdev, "cmdq-sync"); + if (irq > 0) + smmu->cmdq.q.irq = irq; - if (dev->of_node) { - ret = arm_smmu_device_dt_probe(pdev, smmu); - } else { - ret = arm_smmu_device_acpi_probe(pdev, smmu); - if (ret == -ENODEV) - return ret; + irq = platform_get_irq_byname(pdev, "gerror"); + if (irq > 0) + smmu->gerr_irq = irq; } - - /* Set bypass mode according to firmware probing result */ - bypass = !!ret; - /* Probe the h/w */ ret = arm_smmu_device_hw_probe(smmu); if (ret) @@ -2736,6 +2819,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) iommu_device_set_fwnode(&smmu->iommu, dev->fwnode); ret = iommu_device_register(&smmu->iommu); + if (ret) { + dev_err(dev, "Failed to register iommu\n"); + return ret; + } #ifdef CONFIG_PCI if (pci_bus_type.iommu_ops != &arm_smmu_ops) { @@ -2765,10 +2852,16 @@ static int arm_smmu_device_remove(struct platform_device *pdev) struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); + return 0; } -static struct of_device_id arm_smmu_of_match[] = { +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); +} + +static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, }; @@ -2781,6 +2874,7 @@ static struct platform_driver arm_smmu_driver = { }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, + .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7ec30b08b3bd..3bdb799d3b4b 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -54,6 +54,15 @@ #include <linux/amba/bus.h> #include "io-pgtable.h" +#include "arm-smmu-regs.h" + +#define ARM_MMU500_ACTLR_CPRE (1 << 1) + +#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) +#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) + +#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Maximum number of context banks per SMMU */ #define ARM_SMMU_MAX_CBS 128 @@ -83,211 +92,9 @@ #define smmu_write_atomic_lq writel_relaxed #endif -/* Configuration registers */ -#define ARM_SMMU_GR0_sCR0 0x0 -#define sCR0_CLIENTPD (1 << 0) -#define sCR0_GFRE (1 << 1) -#define sCR0_GFIE (1 << 2) -#define sCR0_EXIDENABLE (1 << 3) -#define sCR0_GCFGFRE (1 << 4) -#define sCR0_GCFGFIE (1 << 5) -#define sCR0_USFCFG (1 << 10) -#define sCR0_VMIDPNE (1 << 11) -#define sCR0_PTM (1 << 12) -#define sCR0_FB (1 << 13) -#define sCR0_VMID16EN (1 << 31) -#define sCR0_BSU_SHIFT 14 -#define sCR0_BSU_MASK 0x3 - -/* Auxiliary Configuration register */ -#define ARM_SMMU_GR0_sACR 0x10 - -/* Identification registers */ -#define ARM_SMMU_GR0_ID0 0x20 -#define ARM_SMMU_GR0_ID1 0x24 -#define ARM_SMMU_GR0_ID2 0x28 -#define ARM_SMMU_GR0_ID3 0x2c -#define ARM_SMMU_GR0_ID4 0x30 -#define ARM_SMMU_GR0_ID5 0x34 -#define ARM_SMMU_GR0_ID6 0x38 -#define ARM_SMMU_GR0_ID7 0x3c -#define ARM_SMMU_GR0_sGFSR 0x48 -#define ARM_SMMU_GR0_sGFSYNR0 0x50 -#define ARM_SMMU_GR0_sGFSYNR1 0x54 -#define ARM_SMMU_GR0_sGFSYNR2 0x58 - -#define ID0_S1TS (1 << 30) -#define ID0_S2TS (1 << 29) -#define ID0_NTS (1 << 28) -#define ID0_SMS (1 << 27) -#define ID0_ATOSNS (1 << 26) -#define ID0_PTFS_NO_AARCH32 (1 << 25) -#define ID0_PTFS_NO_AARCH32S (1 << 24) -#define ID0_CTTW (1 << 14) -#define ID0_NUMIRPT_SHIFT 16 -#define ID0_NUMIRPT_MASK 0xff -#define ID0_NUMSIDB_SHIFT 9 -#define ID0_NUMSIDB_MASK 0xf -#define ID0_EXIDS (1 << 8) -#define ID0_NUMSMRG_SHIFT 0 -#define ID0_NUMSMRG_MASK 0xff - -#define ID1_PAGESIZE (1 << 31) -#define ID1_NUMPAGENDXB_SHIFT 28 -#define ID1_NUMPAGENDXB_MASK 7 -#define ID1_NUMS2CB_SHIFT 16 -#define ID1_NUMS2CB_MASK 0xff -#define ID1_NUMCB_SHIFT 0 -#define ID1_NUMCB_MASK 0xff - -#define ID2_OAS_SHIFT 4 -#define ID2_OAS_MASK 0xf -#define ID2_IAS_SHIFT 0 -#define ID2_IAS_MASK 0xf -#define ID2_UBS_SHIFT 8 -#define ID2_UBS_MASK 0xf -#define ID2_PTFS_4K (1 << 12) -#define ID2_PTFS_16K (1 << 13) -#define ID2_PTFS_64K (1 << 14) -#define ID2_VMID16 (1 << 15) - -#define ID7_MAJOR_SHIFT 4 -#define ID7_MAJOR_MASK 0xf - -/* Global TLB invalidation */ -#define ARM_SMMU_GR0_TLBIVMID 0x64 -#define ARM_SMMU_GR0_TLBIALLNSNH 0x68 -#define ARM_SMMU_GR0_TLBIALLH 0x6c -#define ARM_SMMU_GR0_sTLBGSYNC 0x70 -#define ARM_SMMU_GR0_sTLBGSTATUS 0x74 -#define sTLBGSTATUS_GSACTIVE (1 << 0) -#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ -#define TLB_SPIN_COUNT 10 - -/* Stream mapping registers */ -#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) -#define SMR_VALID (1 << 31) -#define SMR_MASK_SHIFT 16 -#define SMR_ID_SHIFT 0 - -#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) -#define S2CR_CBNDX_SHIFT 0 -#define S2CR_CBNDX_MASK 0xff -#define S2CR_EXIDVALID (1 << 10) -#define S2CR_TYPE_SHIFT 16 -#define S2CR_TYPE_MASK 0x3 -enum arm_smmu_s2cr_type { - S2CR_TYPE_TRANS, - S2CR_TYPE_BYPASS, - S2CR_TYPE_FAULT, -}; - -#define S2CR_PRIVCFG_SHIFT 24 -#define S2CR_PRIVCFG_MASK 0x3 -enum arm_smmu_s2cr_privcfg { - S2CR_PRIVCFG_DEFAULT, - S2CR_PRIVCFG_DIPAN, - S2CR_PRIVCFG_UNPRIV, - S2CR_PRIVCFG_PRIV, -}; - -/* Context bank attribute registers */ -#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) -#define CBAR_VMID_SHIFT 0 -#define CBAR_VMID_MASK 0xff -#define CBAR_S1_BPSHCFG_SHIFT 8 -#define CBAR_S1_BPSHCFG_MASK 3 -#define CBAR_S1_BPSHCFG_NSH 3 -#define CBAR_S1_MEMATTR_SHIFT 12 -#define CBAR_S1_MEMATTR_MASK 0xf -#define CBAR_S1_MEMATTR_WB 0xf -#define CBAR_TYPE_SHIFT 16 -#define CBAR_TYPE_MASK 0x3 -#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT) -#define CBAR_IRPTNDX_SHIFT 24 -#define CBAR_IRPTNDX_MASK 0xff - -#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) -#define CBA2R_RW64_32BIT (0 << 0) -#define CBA2R_RW64_64BIT (1 << 0) -#define CBA2R_VMID_SHIFT 16 -#define CBA2R_VMID_MASK 0xffff - /* Translation context bank */ #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift)) -#define ARM_SMMU_CB_SCTLR 0x0 -#define ARM_SMMU_CB_ACTLR 0x4 -#define ARM_SMMU_CB_RESUME 0x8 -#define ARM_SMMU_CB_TTBCR2 0x10 -#define ARM_SMMU_CB_TTBR0 0x20 -#define ARM_SMMU_CB_TTBR1 0x28 -#define ARM_SMMU_CB_TTBCR 0x30 -#define ARM_SMMU_CB_CONTEXTIDR 0x34 -#define ARM_SMMU_CB_S1_MAIR0 0x38 -#define ARM_SMMU_CB_S1_MAIR1 0x3c -#define ARM_SMMU_CB_PAR 0x50 -#define ARM_SMMU_CB_FSR 0x58 -#define ARM_SMMU_CB_FAR 0x60 -#define ARM_SMMU_CB_FSYNR0 0x68 -#define ARM_SMMU_CB_S1_TLBIVA 0x600 -#define ARM_SMMU_CB_S1_TLBIASID 0x610 -#define ARM_SMMU_CB_S1_TLBIVAL 0x620 -#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 -#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 -#define ARM_SMMU_CB_TLBSYNC 0x7f0 -#define ARM_SMMU_CB_TLBSTATUS 0x7f4 -#define ARM_SMMU_CB_ATS1PR 0x800 -#define ARM_SMMU_CB_ATSR 0x8f0 - -#define SCTLR_S1_ASIDPNE (1 << 12) -#define SCTLR_CFCFG (1 << 7) -#define SCTLR_CFIE (1 << 6) -#define SCTLR_CFRE (1 << 5) -#define SCTLR_E (1 << 4) -#define SCTLR_AFE (1 << 2) -#define SCTLR_TRE (1 << 1) -#define SCTLR_M (1 << 0) - -#define ARM_MMU500_ACTLR_CPRE (1 << 1) - -#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) -#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) - -#define CB_PAR_F (1 << 0) - -#define ATSR_ACTIVE (1 << 0) - -#define RESUME_RETRY (0 << 0) -#define RESUME_TERMINATE (1 << 0) - -#define TTBCR2_SEP_SHIFT 15 -#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) -#define TTBCR2_AS (1 << 4) - -#define TTBRn_ASID_SHIFT 48 - -#define FSR_MULTI (1 << 31) -#define FSR_SS (1 << 30) -#define FSR_UUT (1 << 8) -#define FSR_ASF (1 << 7) -#define FSR_TLBLKF (1 << 6) -#define FSR_TLBMCF (1 << 5) -#define FSR_EF (1 << 4) -#define FSR_PF (1 << 3) -#define FSR_AFF (1 << 2) -#define FSR_TF (1 << 1) - -#define FSR_IGN (FSR_AFF | FSR_ASF | \ - FSR_TLBMCF | FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF | FSR_IGN) - -#define FSYNR0_WNR (1 << 4) - #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -312,6 +119,14 @@ enum arm_smmu_implementation { CAVIUM_SMMUV2, }; +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_CORELINK_MMU401 +#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 +#endif +#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX +#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 +#endif + struct arm_smmu_s2cr { struct iommu_group *group; int count; @@ -330,6 +145,13 @@ struct arm_smmu_smr { bool valid; }; +struct arm_smmu_cb { + u64 ttbr[2]; + u32 tcr[2]; + u32 mair[2]; + struct arm_smmu_cfg *cfg; +}; + struct arm_smmu_master_cfg { struct arm_smmu_device *smmu; s16 smendx[]; @@ -372,6 +194,7 @@ struct arm_smmu_device { u32 num_context_banks; u32 num_s2_context_banks; DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS); + struct arm_smmu_cb *cbs; atomic_t irptndx; u32 num_mapping_groups; @@ -392,6 +215,8 @@ struct arm_smmu_device { u32 cavium_id_base; /* Specific to Cavium */ + spinlock_t global_sync_lock; + /* IOMMU core code handle */ struct iommu_device iommu; }; @@ -425,10 +250,10 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ + spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; }; @@ -594,9 +419,12 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) { void __iomem *base = ARM_SMMU_GR0(smmu); + unsigned long flags; + spin_lock_irqsave(&smmu->global_sync_lock, flags); __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC, base + ARM_SMMU_GR0_sTLBGSTATUS); + spin_unlock_irqrestore(&smmu->global_sync_lock, flags); } static void arm_smmu_tlb_sync_context(void *cookie) @@ -604,9 +432,12 @@ static void arm_smmu_tlb_sync_context(void *cookie) struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + unsigned long flags; + spin_lock_irqsave(&smmu_domain->cb_lock, flags); __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC, base + ARM_SMMU_CB_TLBSTATUS); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); } static void arm_smmu_tlb_sync_vmid(void *cookie) @@ -760,17 +591,74 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { - u32 reg, reg2; - u64 reg64; - bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx]; + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + + cb->cfg = cfg; + + /* TTBCR */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr; + } else { + cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + cb->tcr[1] |= TTBCR2_SEP_UPSTREAM; + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) + cb->tcr[1] |= TTBCR2_AS; + } + } else { + cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + } + + /* TTBRs */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; + cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + } else { + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + } + } else { + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + } + + /* MAIRs (stage-1 only) */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr; + cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr; + } else { + cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; + cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; + } + } +} + +static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) +{ + u32 reg; + bool stage1; + struct arm_smmu_cb *cb = &smmu->cbs[idx]; + struct arm_smmu_cfg *cfg = cb->cfg; void __iomem *cb_base, *gr1_base; + cb_base = ARM_SMMU_CB(smmu, idx); + + /* Unassigned context banks only need disabling */ + if (!cfg) { + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + return; + } + gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); + /* CBA2R */ if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) reg = CBA2R_RW64_64BIT; @@ -780,7 +668,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, if (smmu->features & ARM_SMMU_FEAT_VMID16) reg |= cfg->vmid << CBA2R_VMID_SHIFT; - writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx)); } /* CBAR */ @@ -799,72 +687,41 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* 8-bit VMIDs live in CBAR */ reg |= cfg->vmid << CBAR_VMID_SHIFT; } - writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx)); /* * TTBCR * We must write this before the TTBRs, since it determines the * access behaviour of some fields (in particular, ASID[15:8]). */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.tcr; - reg2 = 0; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - reg2 |= TTBCR2_SEP_UPSTREAM; - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg2 |= TTBCR2_AS; - } - if (smmu->version > ARM_SMMU_V1) - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); - } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + if (stage1 && smmu->version > ARM_SMMU_V1) + writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2); + writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR); /* TTBRs */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); - writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); - } else { - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); - } + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); + writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0); + writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1); } else { - reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); + writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0); + if (stage1) + writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1); } /* MAIRs (stage-1 only) */ if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.prrr; - reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1); + writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0); + writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1); } /* SCTLR */ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M; if (stage1) reg |= SCTLR_S1_ASIDPNE; -#ifdef __BIG_ENDIAN - reg |= SCTLR_E; -#endif + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + reg |= SCTLR_E; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR); } @@ -1010,6 +867,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1024,6 +884,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, /* Initialise the context bank with our page table cfg */ arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); + arm_smmu_write_context_bank(smmu, cfg->cbndx); /* * Request context fault interrupt. Do this last to avoid the @@ -1056,7 +917,6 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - void __iomem *cb_base; int irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) @@ -1066,8 +926,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) * Disable the context bank and free the page tables before freeing * it. */ - cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + smmu->cbs[cfg->cbndx].cfg = NULL; + arm_smmu_write_context_bank(smmu, cfg->cbndx); if (cfg->irptndx != INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; @@ -1102,7 +962,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); + spin_lock_init(&smmu_domain->cb_lock); return &smmu_domain->domain; } @@ -1380,35 +1240,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -1422,10 +1270,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, void __iomem *cb_base; u32 tmp; u64 phys; - unsigned long va; + unsigned long va, flags; cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); + spin_lock_irqsave(&smmu_domain->cb_lock, flags); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; if (smmu->version == ARM_SMMU_V2) @@ -1435,6 +1284,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); @@ -1442,6 +1292,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, } phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); if (phys & CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); @@ -1454,10 +1305,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1465,17 +1314,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && - smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - ret = arm_smmu_iova_to_phys_hard(domain, iova); - } else { - ret = ops->iova_to_phys(ops, iova); - } + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + return arm_smmu_iova_to_phys_hard(domain, iova); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - - return ret; + return ops->iova_to_phys(ops, iova); } static bool arm_smmu_capable(enum iommu_cap cap) @@ -1517,6 +1360,12 @@ static int arm_smmu_add_device(struct device *dev) if (using_legacy_binding) { ret = arm_smmu_register_legacy_master(dev, &smmu); + + /* + * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master() + * will allocate/initialise a new one. Thus we need to update fwspec for + * later use. + */ fwspec = dev->iommu_fwspec; if (ret) goto out_free; @@ -1556,15 +1405,15 @@ static int arm_smmu_add_device(struct device *dev) ret = arm_smmu_master_alloc_smes(dev); if (ret) - goto out_free; + goto out_cfg_free; iommu_device_link(&smmu->iommu, dev); return 0; +out_cfg_free: + kfree(cfg); out_free: - if (fwspec) - kfree(fwspec->iommu_priv); iommu_fwspec_free(dev); return ret; } @@ -1728,7 +1577,6 @@ static struct iommu_ops arm_smmu_ops = { static void arm_smmu_device_reset(struct arm_smmu_device *smmu) { void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - void __iomem *cb_base; int i; u32 reg, major; @@ -1764,8 +1612,9 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { - cb_base = ARM_SMMU_CB(smmu, i); - writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + void __iomem *cb_base = ARM_SMMU_CB(smmu, i); + + arm_smmu_write_context_bank(smmu, i); writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); /* * Disable MMU-500's not-particularly-beneficial next-page @@ -1931,6 +1780,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->num_mapping_groups = size; mutex_init(&smmu->stream_map_mutex); + spin_lock_init(&smmu->global_sync_lock); if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) { smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L; @@ -1970,6 +1820,10 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->cavium_id_base -= smmu->num_context_banks; dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n"); } + smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks, + sizeof(*smmu->cbs), GFP_KERNEL); + if (!smmu->cbs) + return -ENOMEM; /* ID2 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); @@ -2073,6 +1927,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V1; smmu->model = GENERIC_SMMU; break; + case ACPI_IORT_SMMU_CORELINK_MMU401: + smmu->version = ARM_SMMU_V1_64K; + smmu->model = GENERIC_SMMU; + break; case ACPI_IORT_SMMU_V2: smmu->version = ARM_SMMU_V2; smmu->model = GENERIC_SMMU; @@ -2081,6 +1939,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V2; smmu->model = ARM_MMU500; break; + case ACPI_IORT_SMMU_CAVIUM_THUNDERX: + smmu->version = ARM_SMMU_V2; + smmu->model = CAVIUM_SMMUV2; + break; default: ret = -ENODEV; } @@ -2319,13 +2181,30 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return 0; } +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); +} + +static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + arm_smmu_device_reset(smmu); + return 0; +} + +static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); + static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", .of_match_table = of_match_ptr(arm_smmu_of_match), + .pm = &arm_smmu_pm_ops, }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, + .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8348f366ddd1..9d1cebe7f6cb 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -31,6 +31,8 @@ #include <linux/scatterlist.h> #include <linux/vmalloc.h> +#define IOMMU_MAPPING_ERROR 0 + struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; @@ -314,7 +316,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * If we have devices with different DMA masks, move the free * area cache limit down for the benefit of the smaller one. */ - iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn); + iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); return 0; } @@ -396,13 +398,13 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; - unsigned long shift = iova_shift(iovad); /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; else - free_iova_fast(iovad, iova >> shift, size >> shift); + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); } static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, @@ -500,7 +502,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, { __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); - *handle = DMA_ERROR_CODE; + *handle = IOMMU_MAPPING_ERROR; } /** @@ -533,7 +535,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; - *handle = DMA_ERROR_CODE; + *handle = IOMMU_MAPPING_ERROR; min_size = alloc_sizes & -alloc_sizes; if (min_size < PAGE_SIZE) { @@ -617,18 +619,21 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, phys); + size_t iova_off = 0; dma_addr_t iova; - size = iova_align(iovad, size + iova_off); + if (cookie->type == IOMMU_DMA_IOVA_COOKIE) { + iova_off = iova_offset(&cookie->iovad, phys); + size = iova_align(&cookie->iovad, size + iova_off); + } + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot)) { iommu_dma_free_iova(cookie, iova, size); - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; } return iova + iova_off; } @@ -668,7 +673,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, s->offset += s_iova_off; s->length = s_length; - sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_address(s) = IOMMU_MAPPING_ERROR; sg_dma_len(s) = 0; /* @@ -711,11 +716,11 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int i; for_each_sg(sg, s, nents, i) { - if (sg_dma_address(s) != DMA_ERROR_CODE) + if (sg_dma_address(s) != IOMMU_MAPPING_ERROR) s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); - sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_address(s) = IOMMU_MAPPING_ERROR; sg_dma_len(s) = 0; } } @@ -833,7 +838,7 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return dma_addr == DMA_ERROR_CODE; + return dma_addr == IOMMU_MAPPING_ERROR; } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index cbf7763d8091..ca5ebaeafd6a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, if (mask) { BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); - addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else desc.high = QI_DEV_IOTLB_ADDR(addr); @@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu); * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8 * "Remapping Hardware Unit Hot Plug". */ -static u8 dmar_hp_uuid[] = { - /* 0000 */ 0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C, - /* 0008 */ 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF -}; +static guid_t dmar_hp_guid = + GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B, + 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF); /* * Currently there's only one revision and BIOS will not check the revision id, @@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = { static inline bool dmar_detect_dsm(acpi_handle handle, int func) { - return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func); + return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func); } static int dmar_walk_dsm_resource(acpi_handle handle, int func, @@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func, if (!dmar_detect_dsm(handle, func)) return 0; - obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, + obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, func, NULL, ACPI_TYPE_BUFFER); if (!obj) return -ENODEV; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 2395478dde75..f596fcc32898 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -54,10 +54,6 @@ typedef u32 sysmmu_pte_t; #define lv2ent_small(pent) ((*(pent) & 2) == 2) #define lv2ent_large(pent) ((*(pent) & 3) == 1) -#ifdef CONFIG_BIG_ENDIAN -#warning "revisit driver if we can enable big-endian ptes" -#endif - /* * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces * v5.0 introduced support for 36bit physical address space by shifting @@ -569,7 +565,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, spin_unlock_irqrestore(&data->lock, flags); } -static struct iommu_ops exynos_iommu_ops; +static const struct iommu_ops exynos_iommu_ops; static int __init exynos_sysmmu_probe(struct platform_device *pdev) { @@ -659,6 +655,13 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) } } + /* + * use the first registered sysmmu device for performing + * dma mapping operations on iommu page tables (cpu cache flush) + */ + if (!dma_dev) + dma_dev = &pdev->dev; + pm_runtime_enable(dev); return 0; @@ -1323,7 +1326,7 @@ static int exynos_iommu_of_xlate(struct device *dev, return 0; } -static struct iommu_ops exynos_iommu_ops = { +static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .domain_free = exynos_iommu_domain_free, .attach_dev = exynos_iommu_attach_device, @@ -1339,8 +1342,6 @@ static struct iommu_ops exynos_iommu_ops = { .of_xlate = exynos_iommu_of_xlate, }; -static bool init_done; - static int __init exynos_iommu_init(void) { int ret; @@ -1373,8 +1374,6 @@ static int __init exynos_iommu_init(void) goto err_set_iommu; } - init_done = true; - return 0; err_set_iommu: kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); @@ -1384,27 +1383,6 @@ err_reg_driver: kmem_cache_destroy(lv2table_kmem_cache); return ret; } +core_initcall(exynos_iommu_init); -static int __init exynos_iommu_of_setup(struct device_node *np) -{ - struct platform_device *pdev; - - if (!init_done) - exynos_iommu_init(); - - pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root); - if (!pdev) - return -ENODEV; - - /* - * use the first registered sysmmu device for performing - * dma mapping operations on iommu page tables (cpu cache flush) - */ - if (!dma_dev) - dma_dev = &pdev->dev; - - return 0; -} - -IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", - exynos_iommu_of_setup); +IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", NULL); diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index a34355fca37a..8540625796a1 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -42,6 +42,8 @@ struct pamu_isr_data { static struct paace *ppaact; static struct paace *spaact; +static bool probed; /* Has PAMU been probed? */ + /* * Table for matching compatible strings, for device tree * guts node, for QorIQ SOCs. @@ -530,8 +532,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) if (node) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -557,8 +559,8 @@ found_cpu_node: if (stash_dest_hint == cache_level) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -568,8 +570,7 @@ found_cpu_node: prop = of_get_property(node, "next-level-cache", NULL); if (!prop) { - pr_debug("can't find next-level-cache at %s\n", - node->full_name); + pr_debug("can't find next-level-cache at %pOF\n", node); of_node_put(node); return ~(u32)0; /* can't traverse any further */ } @@ -1033,6 +1034,9 @@ static int fsl_pamu_probe(struct platform_device *pdev) * NOTE : All PAMUs share the same LIODN tables. */ + if (WARN_ON(probed)) + return -EBUSY; + pamu_regs = of_iomap(dev->of_node, 0); if (!pamu_regs) { dev_err(dev, "ioremap of PAMU node failed\n"); @@ -1063,8 +1067,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) guts_node = of_find_matching_node(NULL, guts_device_ids); if (!guts_node) { - dev_err(dev, "could not find GUTS node %s\n", - dev->of_node->full_name); + dev_err(dev, "could not find GUTS node %pOF\n", dev->of_node); ret = -ENODEV; goto error; } @@ -1172,6 +1175,8 @@ static int fsl_pamu_probe(struct platform_device *pdev) setup_liodns(); + probed = true; + return 0; error_genpool: @@ -1246,8 +1251,7 @@ static __init int fsl_pamu_init(void) pdev = platform_device_alloc("fsl-of-pamu", 0); if (!pdev) { - pr_err("could not allocate device %s\n", - np->full_name); + pr_err("could not allocate device %pOF\n", np); ret = -ENOMEM; goto error_device_alloc; } @@ -1259,8 +1263,7 @@ static __init int fsl_pamu_init(void) ret = platform_device_add(pdev); if (ret) { - pr_err("could not add device %s (err=%i)\n", - np->full_name, ret); + pr_err("could not add device %pOF (err=%i)\n", np, ret); goto error_device_add; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index da0e1e30ef37..f089136e9c3f 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -33,6 +33,8 @@ static struct kmem_cache *fsl_pamu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static DEFINE_SPINLOCK(device_domain_lock); +struct iommu_device pamu_iommu; /* IOMMU core code handle */ + static struct fsl_dma_domain *to_fsl_dma_domain(struct iommu_domain *dom) { return container_of(dom, struct fsl_dma_domain, iommu_domain); @@ -619,8 +621,8 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain, for (i = 0; i < num; i++) { /* Ensure that LIODN value is valid */ if (liodn[i] >= PAACE_NUMBER_ENTRIES) { - pr_debug("Invalid liodn %d, attach device failed for %s\n", - liodn[i], dev->of_node->full_name); + pr_debug("Invalid liodn %d, attach device failed for %pOF\n", + liodn[i], dev->of_node); ret = -EINVAL; break; } @@ -684,8 +686,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn_cnt = len / sizeof(u32); ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); } else { - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); ret = -EINVAL; } @@ -720,8 +721,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, if (prop) detach_device(dev, dma_domain); else - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); } static int configure_domain_geometry(struct iommu_domain *domain, void *data) @@ -983,11 +983,14 @@ static int fsl_pamu_add_device(struct device *dev) iommu_group_put(group); + iommu_device_link(&pamu_iommu, dev); + return 0; } static void fsl_pamu_remove_device(struct device *dev) { + iommu_device_unlink(&pamu_iommu, dev); iommu_group_remove_device(dev); } @@ -1073,6 +1076,19 @@ int __init pamu_domain_init(void) if (ret) return ret; + ret = iommu_device_sysfs_add(&pamu_iommu, NULL, NULL, "iommu0"); + if (ret) + return ret; + + iommu_device_set_ops(&pamu_iommu, &fsl_pamu_ops); + + ret = iommu_device_register(&pamu_iommu); + if (ret) { + iommu_device_sysfs_remove(&pamu_iommu); + pr_err("Can't register iommu device\n"); + return ret; + } + bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5b112b6a2c9c..2be8e23448ee 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -static void flush_unmaps_timeout(unsigned long data); - -struct deferred_flush_entry { - unsigned long iova_pfn; - unsigned long nrpages; - struct dmar_domain *domain; - struct page *freelist; -}; - -#define HIGH_WATER_MARK 250 -struct deferred_flush_table { - int next; - struct deferred_flush_entry entries[HIGH_WATER_MARK]; -}; - -struct deferred_flush_data { - spinlock_t lock; - int timer_on; - struct timer_list timer; - long size; - struct deferred_flush_table *tables; -}; - -static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); - /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; @@ -1305,6 +1280,13 @@ static void dma_free_pagelist(struct page *freelist) } } +static void iova_entry_free(unsigned long data) +{ + struct page *freelist = (struct page *)data; + + dma_free_pagelist(freelist); +} + /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1618,6 +1600,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, addr, mask); } +static void iommu_flush_iova(struct iova_domain *iovad) +{ + struct dmar_domain *domain; + int idx; + + domain = container_of(iovad, struct dmar_domain, iovad); + + for_each_domain_iommu(idx, domain) { + struct intel_iommu *iommu = g_iommus[idx]; + u16 did = domain->iommu_did[iommu->seq_id]; + + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + + if (!cap_caching_mode(iommu->cap)) + iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), + 0, MAX_AGAW_PFN_WIDTH); + } +} + static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) { u32 pmen; @@ -1928,9 +1929,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, { int adjust_width, agaw; unsigned long sagaw; + int err; init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); + + err = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (err) + return err; + domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -1982,14 +1990,6 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; - /* Flush any lazy unmaps that may reference this domain */ - if (!intel_iommu_strict) { - int cpu; - - for_each_possible_cpu(cpu) - flush_unmaps_timeout(cpu); - } - /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); domain_remove_dev_info(domain); @@ -2051,11 +2051,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } } pgd = domain->pgd; @@ -3222,7 +3225,7 @@ static int __init init_dmars(void) bool copied_tables = false; struct device *dev; struct intel_iommu *iommu; - int i, ret, cpu; + int i, ret; /* * for each drhd @@ -3255,22 +3258,6 @@ static int __init init_dmars(void) goto error; } - for_each_possible_cpu(cpu) { - struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush, - cpu); - - dfd->tables = kzalloc(g_num_of_iommus * - sizeof(struct deferred_flush_table), - GFP_KERNEL); - if (!dfd->tables) { - ret = -ENOMEM; - goto free_g_iommus; - } - - spin_lock_init(&dfd->lock); - setup_timer(&dfd->timer, flush_unmaps_timeout, cpu); - } - for_each_active_iommu(iommu, drhd) { g_iommus[iommu->seq_id] = iommu; @@ -3453,10 +3440,9 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } -free_g_iommus: - for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(&deferred_flush, cpu)->tables); + kfree(g_iommus); + error: return ret; } @@ -3661,110 +3647,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, dir, *dev->dma_mask); } -static void flush_unmaps(struct deferred_flush_data *flush_data) -{ - int i, j; - - flush_data->timer_on = 0; - - /* just flush them all */ - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct deferred_flush_table *flush_table = - &flush_data->tables[i]; - if (!iommu) - continue; - - if (!flush_table->next) - continue; - - /* In caching mode, global flushes turn emulation expensive */ - if (!cap_caching_mode(iommu->cap)) - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - for (j = 0; j < flush_table->next; j++) { - unsigned long mask; - struct deferred_flush_entry *entry = - &flush_table->entries[j]; - unsigned long iova_pfn = entry->iova_pfn; - unsigned long nrpages = entry->nrpages; - struct dmar_domain *domain = entry->domain; - struct page *freelist = entry->freelist; - - /* On real hardware multiple invalidations are expensive */ - if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain, - mm_to_dma_pfn(iova_pfn), - nrpages, !freelist, 0); - else { - mask = ilog2(nrpages); - iommu_flush_dev_iotlb(domain, - (uint64_t)iova_pfn << PAGE_SHIFT, mask); - } - free_iova_fast(&domain->iovad, iova_pfn, nrpages); - if (freelist) - dma_free_pagelist(freelist); - } - flush_table->next = 0; - } - - flush_data->size = 0; -} - -static void flush_unmaps_timeout(unsigned long cpuid) -{ - struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid); - unsigned long flags; - - spin_lock_irqsave(&flush_data->lock, flags); - flush_unmaps(flush_data); - spin_unlock_irqrestore(&flush_data->lock, flags); -} - -static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, - unsigned long nrpages, struct page *freelist) -{ - unsigned long flags; - int entry_id, iommu_id; - struct intel_iommu *iommu; - struct deferred_flush_entry *entry; - struct deferred_flush_data *flush_data; - - flush_data = raw_cpu_ptr(&deferred_flush); - - /* Flush all CPUs' entries to avoid deferring too much. If - * this becomes a bottleneck, can just flush us, and rely on - * flush timer for the rest. - */ - if (flush_data->size == HIGH_WATER_MARK) { - int cpu; - - for_each_online_cpu(cpu) - flush_unmaps_timeout(cpu); - } - - spin_lock_irqsave(&flush_data->lock, flags); - - iommu = domain_get_iommu(dom); - iommu_id = iommu->seq_id; - - entry_id = flush_data->tables[iommu_id].next; - ++(flush_data->tables[iommu_id].next); - - entry = &flush_data->tables[iommu_id].entries[entry_id]; - entry->domain = dom; - entry->iova_pfn = iova_pfn; - entry->nrpages = nrpages; - entry->freelist = freelist; - - if (!flush_data->timer_on) { - mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10)); - flush_data->timer_on = 1; - } - flush_data->size++; - spin_unlock_irqrestore(&flush_data->lock, flags); -} - static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) { struct dmar_domain *domain; @@ -3800,7 +3682,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); dma_free_pagelist(freelist); } else { - add_unmap(domain, iova_pfn, nrpages, freelist); + queue_iova(&domain->iovad, iova_pfn, nrpages, + (unsigned long)freelist); /* * queue up the release of the unmap to save the 1/6th of the * cpu used up by the iotlb flush operation... @@ -3982,6 +3865,9 @@ const struct dma_map_ops intel_dma_ops = { .map_page = intel_map_page, .unmap_page = intel_unmap_page, .mapping_error = intel_mapping_error, +#ifdef CONFIG_X86 + .dma_supported = x86_dma_supported, +#endif }; static inline int iommu_domain_cache_init(void) @@ -4316,7 +4202,7 @@ int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; - if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled) + if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) return 0; atsr = container_of(hdr, struct acpi_dmar_atsr, header); @@ -4566,7 +4452,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) struct acpi_dmar_atsr *atsr; struct acpi_dmar_reserved_memory *rmrr; - if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) + if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) return 0; list_for_each_entry(rmrru, &dmar_rmrr_units, list) { @@ -4735,7 +4621,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) static int intel_iommu_cpu_dead(unsigned int cpu) { free_all_cpu_cached_iovas(cpu); - flush_unmaps_timeout(cpu); return 0; } @@ -4750,7 +4635,9 @@ static void intel_disable_iommus(void) static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { - return container_of(dev, struct intel_iommu, iommu.dev); + struct iommu_device *iommu_dev = dev_to_iommu_device(dev); + + return container_of(iommu_dev, struct intel_iommu, iommu); } static ssize_t intel_iommu_show_version(struct device *dev, diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index f7ef4a5d4785..a5b89f6bcdbf 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -500,8 +500,9 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu) static int intel_setup_irq_remapping(struct intel_iommu *iommu) { struct ir_table *ir_table; - struct page *pages; + struct fwnode_handle *fn; unsigned long *bitmap; + struct page *pages; if (iommu->ir_table) return 0; @@ -525,15 +526,24 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_pages; } - iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(), - 0, INTR_REMAP_TABLE_ENTRIES, - NULL, &intel_ir_domain_ops, - iommu); + fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id); + if (!fn) + goto out_free_bitmap; + + iommu->ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, INTR_REMAP_TABLE_ENTRIES, + fn, &intel_ir_domain_ops, + iommu); + irq_domain_free_fwnode(fn); if (!iommu->ir_domain) { pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_bitmap; } - iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); + iommu->ir_msi_domain = + arch_create_remap_msi_irq_domain(iommu->ir_domain, + "INTEL-IR-MSI", + iommu->seq_id); ir_table->base = page_address(pages); ir_table->bitmap = bitmap; @@ -1205,10 +1215,11 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) } static struct irq_chip intel_ir_chip = { - .irq_ack = ir_ack_apic_edge, - .irq_set_affinity = intel_ir_set_affinity, - .irq_compose_msi_msg = intel_ir_compose_msi_msg, - .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, + .name = "INTEL-IR", + .irq_ack = ir_ack_apic_edge, + .irq_set_affinity = intel_ir_set_affinity, + .irq_compose_msi_msg = intel_ir_compose_msi_msg, + .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, }; static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8d6ca28c3e1f..d665d0dc16e8 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -32,6 +32,7 @@ #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt +#include <linux/atomic.h> #include <linux/dma-mapping.h> #include <linux/gfp.h> #include <linux/iommu.h> @@ -39,6 +40,7 @@ #include <linux/kmemleak.h> #include <linux/sizes.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/types.h> #include <asm/barrier.h> @@ -92,7 +94,8 @@ #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0) -#define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE)) +#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \ + ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE)) /* Page table bits */ #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl))) @@ -167,6 +170,7 @@ struct arm_v7s_io_pgtable { arm_v7s_iopte *pgd; struct kmem_cache *l2_tables; + spinlock_t split_lock; }; static dma_addr_t __arm_v7s_dma_addr(void *pages) @@ -186,7 +190,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl) static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl); void *table = NULL; @@ -195,7 +200,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); - if (table && !selftest_running) { + if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -224,10 +229,11 @@ out_free: static void __arm_v7s_free_table(void *table, int lvl, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -239,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -280,6 +286,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl, else if (prot & IOMMU_CACHE) pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C; + pte |= ARM_V7S_PTE_TYPE_PAGE; + if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) + pte |= ARM_V7S_ATTR_NS_SECTION; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) + pte |= ARM_V7S_ATTR_MTK_4GB; + return pte; } @@ -352,7 +365,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, int lvl, int num_entries, arm_v7s_iopte *ptep) { struct io_pgtable_cfg *cfg = &data->iop.cfg; - arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg); + arm_v7s_iopte pte; int i; for (i = 0; i < num_entries; i++) @@ -374,13 +387,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return -EEXIST; } - pte |= ARM_V7S_PTE_TYPE_PAGE; - if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) - pte |= ARM_V7S_ATTR_NS_SECTION; - - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) - pte |= ARM_V7S_ATTR_MTK_4GB; - + pte = arm_v7s_prot_to_pte(prot, lvl, cfg); if (num_entries > 1) pte = arm_v7s_pte_to_cont(pte, lvl); @@ -390,6 +397,30 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return 0; } +static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, + arm_v7s_iopte *ptep, + arm_v7s_iopte curr, + struct io_pgtable_cfg *cfg) +{ + arm_v7s_iopte old, new; + + new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) + new |= ARM_V7S_ATTR_NS_TABLE; + + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); + + old = cmpxchg_relaxed(ptep, curr, new); + __arm_v7s_pte_sync(ptep, 1, cfg); + + return old; +} + static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, phys_addr_t paddr, size_t size, int prot, int lvl, arm_v7s_iopte *ptep) @@ -411,20 +442,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data); if (!cptep) return -ENOMEM; - pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_V7S_ATTR_NS_TABLE; + pte = arm_v7s_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_v7s_free_table(cptep, lvl + 1, data); + } else { + /* We've no easy way of knowing if it's synced yet, so... */ + __arm_v7s_pte_sync(ptep, 1, cfg); + } - __arm_v7s_set_pte(ptep, pte, 1, cfg); - } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { + if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); - } else { + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -445,6 +479,9 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, if (!(prot & (IOMMU_READ | IOMMU_WRITE))) return 0; + if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr))) + return -ERANGE; + ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd); /* * Synchronise all PTE updates for the new mapping before there's @@ -477,66 +514,73 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) kfree(data); } -static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, - unsigned long iova, int idx, int lvl, - arm_v7s_iopte *ptep) +static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, + unsigned long iova, int idx, int lvl, + arm_v7s_iopte *ptep) { struct io_pgtable *iop = &data->iop; arm_v7s_iopte pte; size_t size = ARM_V7S_BLOCK_SIZE(lvl); int i; + /* Check that we didn't lose a race to get the lock */ + pte = *ptep; + if (!arm_v7s_pte_is_cont(pte, lvl)) + return pte; + ptep -= idx & (ARM_V7S_CONT_PAGES - 1); - pte = arm_v7s_cont_to_pte(*ptep, lvl); - for (i = 0; i < ARM_V7S_CONT_PAGES; i++) { - ptep[i] = pte; - pte += size; - } + pte = arm_v7s_cont_to_pte(pte, lvl); + for (i = 0; i < ARM_V7S_CONT_PAGES; i++) + ptep[i] = pte + i * size; __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); size *= ARM_V7S_CONT_PAGES; io_pgtable_tlb_add_flush(iop, iova, size, size, true); io_pgtable_tlb_sync(iop); + return pte; } static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, unsigned long iova, size_t size, - arm_v7s_iopte *ptep) + arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep) { - unsigned long blk_start, blk_end, blk_size; - phys_addr_t blk_paddr; - arm_v7s_iopte table = 0; - int prot = arm_v7s_pte_to_prot(*ptep, 1); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_v7s_iopte pte, *tablep; + int i, unmap_idx, num_entries, num_ptes; - blk_size = ARM_V7S_BLOCK_SIZE(1); - blk_start = iova & ARM_V7S_LVL_MASK(1); - blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1); - blk_paddr = *ptep & ARM_V7S_LVL_MASK(1); + tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data); + if (!tablep) + return 0; /* Bytes unmapped */ - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_v7s_iopte *tablep; + num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_entries = size >> ARM_V7S_LVL_SHIFT(2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); + if (num_entries > 1) + pte = arm_v7s_pte_to_cont(pte, 2); + + for (i = 0; i < num_ptes; i += num_entries, pte += size) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_v7s_map expects a pointer to the start of the table */ - tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1); - if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, 1); - __arm_v7s_free_table(tablep, 2, data); - } - return 0; /* Bytes unmapped */ - } + __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); + } + + pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_v7s_free_table(tablep, 2, data); + + if (!ARM_V7S_PTE_IS_TABLE(pte, 1)) + return 0; + + tablep = iopte_deref(pte, 1); + return __arm_v7s_unmap(data, iova, size, 2, tablep); } - __arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -555,17 +599,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, idx = ARM_V7S_LVL_IDX(iova, lvl); ptep += idx; do { - if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i]))) + pte[i] = READ_ONCE(ptep[i]); + if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i]))) return 0; - pte[i] = ptep[i]; } while (++i < num_entries); /* * If we've hit a contiguous 'large page' entry at this level, it * needs splitting first, unless we're unmapping the whole lot. + * + * For splitting, we can't rewrite 16 PTEs atomically, and since we + * can't necessarily assume TEX remap we don't have a software bit to + * mark live entries being split. In practice (i.e. DMA API code), we + * will never be splitting large pages anyway, so just wrap this edge + * case in a lock for the sake of correctness and be done with it. */ - if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) - arm_v7s_split_cont(data, iova, idx, lvl, ptep); + if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) { + unsigned long flags; + + spin_lock_irqsave(&data->split_lock, flags); + pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep); + spin_unlock_irqrestore(&data->split_lock, flags); + } /* If the size matches this level, we're in the right place */ if (num_entries) { @@ -593,7 +648,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_v7s_split_blk_unmap(data, iova, size, ptep); + return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); } /* Keep on walkin' */ @@ -607,6 +662,9 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); size_t unmapped; + if (WARN_ON(upper_32_bits(iova))) + return 0; + unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); if (unmapped) io_pgtable_tlb_sync(&data->iop); @@ -623,7 +681,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)]; + ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -651,7 +710,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | - IO_PGTABLE_QUIRK_ARM_MTK_4GB)) + IO_PGTABLE_QUIRK_ARM_MTK_4GB | + IO_PGTABLE_QUIRK_NO_DMA)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ @@ -663,6 +723,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (!data) return NULL; + spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), @@ -749,7 +810,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } -static struct iommu_gather_ops dummy_tlb_ops = { +static const struct iommu_gather_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, @@ -768,7 +829,7 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS, + .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6e5df5e0a3bd..e8018a308868 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt +#include <linux/atomic.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/sizes.h> @@ -99,6 +100,8 @@ #define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) +/* Software bit for solving coherency races */ +#define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) @@ -217,7 +220,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, if (!pages) return NULL; - if (!selftest_running) { + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -243,40 +246,64 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages_exact(pages, size); } +static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, + struct io_pgtable_cfg *cfg) +{ + dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), + sizeof(*ptep), DMA_TO_DEVICE); +} + static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, struct io_pgtable_cfg *cfg) { *ptep = pte; - if (!selftest_running) - dma_sync_single_for_device(cfg->iommu_dev, - __arm_lpae_dma_addr(ptep), - sizeof(pte), DMA_TO_DEVICE); + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + __arm_lpae_sync_pte(ptep, cfg); } static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, int lvl, arm_lpae_iopte *ptep); +static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + phys_addr_t paddr, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); +} + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) { - arm_lpae_iopte pte = prot; - struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte = *ptep; - if (iopte_leaf(*ptep, lvl)) { + if (iopte_leaf(pte, lvl)) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; - } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { + } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { /* * We need to unmap and free the old table before * overwriting it with a block entry. @@ -289,19 +316,40 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EINVAL; } + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + return 0; +} + +static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, + arm_lpae_iopte *ptep, + arm_lpae_iopte curr, + struct io_pgtable_cfg *cfg) +{ + arm_lpae_iopte old, new; + + new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; + new |= ARM_LPAE_PTE_NSTABLE; - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - pte |= ARM_LPAE_PTE_TYPE_PAGE; - else - pte |= ARM_LPAE_PTE_TYPE_BLOCK; + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; - pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + old = cmpxchg64_relaxed(ptep, curr, new); - __arm_lpae_set_pte(ptep, pte, cfg); - return 0; + if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || + (old & ARM_LPAE_PTE_SW_SYNC)) + return old; + + /* Even if it's not ours, there's no point waiting; just kick it */ + __arm_lpae_sync_pte(ptep, cfg); + if (old == curr) + WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); + + return old; } static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, @@ -310,6 +358,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, { arm_lpae_iopte *cptep, pte; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + size_t tblsz = ARM_LPAE_GRANULE(data); struct io_pgtable_cfg *cfg = &data->iop.cfg; /* Find our entry at the current level */ @@ -324,20 +373,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { - cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), - GFP_ATOMIC, cfg); + cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; - pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NSTABLE; - __arm_lpae_set_pte(ptep, pte, cfg); - } else if (!iopte_leaf(pte, lvl)) { + pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_lpae_free_pages(cptep, tblsz, cfg); + } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && + !(pte & ARM_LPAE_PTE_SW_SYNC)) { + __arm_lpae_sync_pte(ptep, cfg); + } + + if (pte && !iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); - } else { + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -400,6 +452,10 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) return 0; + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || + paddr >= (1ULL << data->iop.cfg.oas))) + return -ERANGE; + prot = arm_lpae_prot_to_pte(data, iommu_prot); ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); /* @@ -452,40 +508,55 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, - arm_lpae_iopte prot, int lvl, - arm_lpae_iopte *ptep, size_t blk_size) + arm_lpae_iopte blk_pte, int lvl, + arm_lpae_iopte *ptep) { - unsigned long blk_start, blk_end; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte, *tablep; phys_addr_t blk_paddr; - arm_lpae_iopte table = 0; + size_t tablesz = ARM_LPAE_GRANULE(data); + size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + int i, unmap_idx = -1; - blk_start = iova & ~(blk_size - 1); - blk_end = blk_start + blk_size; - blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); + if (!tablep) + return 0; /* Bytes unmapped */ + + if (size == split_sz) + unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data); - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_lpae_iopte *tablep; + blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift; + pte = iopte_prot(blk_pte); + for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_lpae_map expects a pointer to the start of the table */ - tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); - if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, data); - __arm_lpae_free_pgtable(data, lvl + 1, tablep); - } - return 0; /* Bytes unmapped */ - } + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); + } + + pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_lpae_free_pages(tablep, tablesz, cfg); + /* + * We may race against someone unmapping another part of this + * block, but anything else is invalid. We can't misinterpret + * a page entry here since we're never at the last level. + */ + if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE) + return 0; + + tablep = iopte_deref(pte, data); } - __arm_lpae_set_pte(ptep, table, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + if (unmap_idx < 0) + return __arm_lpae_unmap(data, iova, size, lvl, tablep); + + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -495,19 +566,18 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; struct io_pgtable *iop = &data->iop; - size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); /* Something went horribly wrong and we ran out of page table */ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) return 0; ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = *ptep; + pte = READ_ONCE(*ptep); if (WARN_ON(!pte)) return 0; /* If the size matches this level, we're in the right place */ - if (size == blk_size) { + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { __arm_lpae_set_pte(ptep, 0, &iop->cfg); if (!iopte_leaf(pte, lvl)) { @@ -527,9 +597,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_lpae_split_blk_unmap(data, iova, size, - iopte_prot(pte), lvl, ptep, - blk_size); + return arm_lpae_split_blk_unmap(data, iova, size, pte, + lvl + 1, ptep); } /* Keep on walkin' */ @@ -545,6 +614,9 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) + return 0; + unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); if (unmapped) io_pgtable_tlb_sync(&data->iop); @@ -565,7 +637,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; /* Grab the IOPTE we're interested in */ - pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); /* Valid entry? */ if (!pte) @@ -673,7 +746,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -762,7 +835,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1066,6 +1139,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, + .quirks = IO_PGTABLE_QUIRK_NO_DMA, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 969d82cc92ca..a3e667077b14 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -65,11 +65,17 @@ struct io_pgtable_cfg { * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). + * + * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever + * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a + * software-emulated IOMMU), such that pagetable updates need not + * be treated as explicit DMA data. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) + #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; @@ -152,14 +158,12 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops); * @fmt: The page table format. * @cookie: An opaque token provided by the IOMMU driver and passed back to * any callback routines. - * @tlb_sync_pending: Private flag for optimising out redundant syncs. * @cfg: A copy of the page table configuration. * @ops: The page table operations in use for this set of page tables. */ struct io_pgtable { enum io_pgtable_fmt fmt; void *cookie; - bool tlb_sync_pending; struct io_pgtable_cfg cfg; struct io_pgtable_ops ops; }; @@ -169,22 +173,17 @@ struct io_pgtable { static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { iop->cfg.tlb->tlb_flush_all(iop->cookie); - iop->tlb_sync_pending = true; } static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule, bool leaf) { iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie); - iop->tlb_sync_pending = true; } static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) { - if (iop->tlb_sync_pending) { - iop->cfg.tlb->tlb_sync(iop->cookie); - iop->tlb_sync_pending = false; - } + iop->cfg.tlb->tlb_sync(iop->cookie); } /** diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index c58351ed61c1..36d1a7ce7fc4 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, va_list vargs; int ret; - device_initialize(&iommu->dev); + iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL); + if (!iommu->dev) + return -ENOMEM; - iommu->dev.class = &iommu_class; - iommu->dev.parent = parent; - iommu->dev.groups = groups; + device_initialize(iommu->dev); + + iommu->dev->class = &iommu_class; + iommu->dev->parent = parent; + iommu->dev->groups = groups; va_start(vargs, fmt); - ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs); + ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs); va_end(vargs); if (ret) goto error; - ret = device_add(&iommu->dev); + ret = device_add(iommu->dev); if (ret) goto error; + dev_set_drvdata(iommu->dev, iommu); + return 0; error: - put_device(&iommu->dev); + put_device(iommu->dev); return ret; } void iommu_device_sysfs_remove(struct iommu_device *iommu) { - device_unregister(&iommu->dev); + dev_set_drvdata(iommu->dev, NULL); + device_unregister(iommu->dev); + iommu->dev = NULL; } /* * IOMMU drivers can indicate a device is managed by a given IOMMU using @@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) if (!iommu || IS_ERR(iommu)) return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices", + ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) return ret; - ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu"); + ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu"); if (ret) - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); return ret; @@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link) return; sysfs_remove_link(&link->kobj, "iommu"); - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link)); + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index cf7ca7e70777..3de5c0bcb5cc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -527,6 +527,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, } + iommu_flush_tlb_all(domain); + out: iommu_put_resv_regions(dev, &mappings); @@ -915,13 +917,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) */ struct iommu_group *generic_device_group(struct device *dev) { - struct iommu_group *group; - - group = iommu_group_alloc(); - if (IS_ERR(group)) - return NULL; - - return group; + return iommu_group_alloc(); } /* @@ -988,11 +984,7 @@ struct iommu_group *pci_device_group(struct device *dev) return group; /* No shared group found, allocate new */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return NULL; - - return group; + return iommu_group_alloc(); } /** @@ -1015,10 +1007,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - group = ERR_PTR(-EINVAL); + if (!ops) + return ERR_PTR(-EINVAL); - if (ops && ops->device_group) - group = ops->device_group(dev); + group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + return ERR_PTR(-EINVAL); if (IS_ERR(group)) return group; @@ -1290,6 +1284,10 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { int ret; + if ((domain->ops->is_attach_deferred != NULL) && + domain->ops->is_attach_deferred(domain, dev)) + return 0; + if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; @@ -1305,12 +1303,8 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) int ret; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_attach_device(domain, dev); - /* - * We have a group - lock it to make sure the device-count doesn't + * Lock the group to make sure the device-count doesn't * change while we are attaching */ mutex_lock(&group->mutex); @@ -1331,6 +1325,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { + if ((domain->ops->is_attach_deferred != NULL) && + domain->ops->is_attach_deferred(domain, dev)) + return; + if (unlikely(domain->ops->detach_dev == NULL)) return; @@ -1343,9 +1341,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_detach_device(domain, dev); mutex_lock(&group->mutex); if (iommu_group_device_count(group) != 1) { @@ -1367,8 +1362,7 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) + if (!group) return NULL; domain = group->domain; @@ -1563,13 +1557,16 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map); -size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +static size_t __iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size, + bool sync) { + const struct iommu_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; - unsigned int min_pagesz; unsigned long orig_iova = iova; + unsigned int min_pagesz; - if (unlikely(domain->ops->unmap == NULL || + if (unlikely(ops->unmap == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1599,10 +1596,13 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) while (unmapped < size) { size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); - unmapped_page = domain->ops->unmap(domain, iova, pgsize); + unmapped_page = ops->unmap(domain, iova, pgsize); if (!unmapped_page) break; + if (sync && ops->iotlb_range_add) + ops->iotlb_range_add(domain, iova, pgsize); + pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); @@ -1610,11 +1610,27 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } + if (sync && ops->iotlb_sync) + ops->iotlb_sync(domain); + trace_unmap(orig_iova, size, unmapped); return unmapped; } + +size_t iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, true); +} EXPORT_SYMBOL_GPL(iommu_unmap); +size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, false); +} +EXPORT_SYMBOL_GPL(iommu_unmap_fast); + size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 5c88ba70e4e0..33edfa794ae9 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -22,6 +22,7 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/bitops.h> +#include <linux/cpu.h> static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, @@ -31,6 +32,8 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long limit_pfn); static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); +static void fq_destroy_all_entries(struct iova_domain *iovad); +static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -48,11 +51,62 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit; + iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->flush_cb = NULL; + iovad->fq = NULL; init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); +static void free_iova_flush_queue(struct iova_domain *iovad) +{ + if (!iovad->fq) + return; + + if (timer_pending(&iovad->fq_timer)) + del_timer(&iovad->fq_timer); + + fq_destroy_all_entries(iovad); + + free_percpu(iovad->fq); + + iovad->fq = NULL; + iovad->flush_cb = NULL; + iovad->entry_dtor = NULL; +} + +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) +{ + int cpu; + + atomic64_set(&iovad->fq_flush_start_cnt, 0); + atomic64_set(&iovad->fq_flush_finish_cnt, 0); + + iovad->fq = alloc_percpu(struct iova_fq); + if (!iovad->fq) + return -ENOMEM; + + iovad->flush_cb = flush_cb; + iovad->entry_dtor = entry_dtor; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + fq->head = 0; + fq->tail = 0; + + spin_lock_init(&fq->lock); + } + + setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad); + atomic_set(&iovad->fq_timer_on, 0); + + return 0; +} +EXPORT_SYMBOL_GPL(init_iova_flush_queue); + static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { @@ -63,7 +117,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) struct rb_node *prev_node = rb_prev(iovad->cached32_node); struct iova *curr_iova = rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo - 1; + *limit_pfn = curr_iova->pfn_lo; return prev_node; } } @@ -135,7 +189,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, static unsigned int iova_get_pad_size(unsigned int size, unsigned int limit_pfn) { - return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1); + return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); } static int __alloc_and_insert_iova_range(struct iova_domain *iovad, @@ -155,18 +209,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, while (curr) { struct iova *curr_iova = rb_entry(curr, struct iova, node); - if (limit_pfn < curr_iova->pfn_lo) + if (limit_pfn <= curr_iova->pfn_lo) { goto move_left; - else if (limit_pfn < curr_iova->pfn_hi) - goto adjust_limit_pfn; - else { + } else if (limit_pfn > curr_iova->pfn_hi) { if (size_aligned) pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) + if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) break; /* found a free slot */ } -adjust_limit_pfn: - limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0; + limit_pfn = curr_iova->pfn_lo; move_left: prev = curr; curr = rb_prev(curr); @@ -182,7 +233,7 @@ move_left: } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size) + 1; + new->pfn_lo = limit_pfn - (size + pad_size); new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ @@ -269,7 +320,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (!new_iova) return NULL; - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, new_iova, size_aligned); if (ret) { @@ -398,10 +449,8 @@ retry: /* Try replenishing IOVAs by flushing rcache. */ flushed_rcache = true; - preempt_disable(); for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); - preempt_enable(); goto retry; } @@ -427,6 +476,135 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +#define fq_ring_for_each(i, fq) \ + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + +static inline bool fq_full(struct iova_fq *fq) +{ + assert_spin_locked(&fq->lock); + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); +} + +static inline unsigned fq_ring_add(struct iova_fq *fq) +{ + unsigned idx = fq->tail; + + assert_spin_locked(&fq->lock); + + fq->tail = (idx + 1) % IOVA_FQ_SIZE; + + return idx; +} + +static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) +{ + u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); + unsigned idx; + + assert_spin_locked(&fq->lock); + + fq_ring_for_each(idx, fq) { + + if (fq->entries[idx].counter >= counter) + break; + + if (iovad->entry_dtor) + iovad->entry_dtor(fq->entries[idx].data); + + free_iova_fast(iovad, + fq->entries[idx].iova_pfn, + fq->entries[idx].pages); + + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + } +} + +static void iova_domain_flush(struct iova_domain *iovad) +{ + atomic64_inc(&iovad->fq_flush_start_cnt); + iovad->flush_cb(iovad); + atomic64_inc(&iovad->fq_flush_finish_cnt); +} + +static void fq_destroy_all_entries(struct iova_domain *iovad) +{ + int cpu; + + /* + * This code runs when the iova_domain is being detroyed, so don't + * bother to free iovas, just call the entry_dtor on all remaining + * entries. + */ + if (!iovad->entry_dtor) + return; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); + int idx; + + fq_ring_for_each(idx, fq) + iovad->entry_dtor(fq->entries[idx].data); + } +} + +static void fq_flush_timeout(unsigned long data) +{ + struct iova_domain *iovad = (struct iova_domain *)data; + int cpu; + + atomic_set(&iovad->fq_timer_on, 0); + iova_domain_flush(iovad); + + for_each_possible_cpu(cpu) { + unsigned long flags; + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free(iovad, fq); + spin_unlock_irqrestore(&fq->lock, flags); + } +} + +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ + struct iova_fq *fq = get_cpu_ptr(iovad->fq); + unsigned long flags; + unsigned idx; + + spin_lock_irqsave(&fq->lock, flags); + + /* + * First remove all entries from the flush queue that have already been + * flushed out on another CPU. This makes the fq_full() check below less + * likely to be true. + */ + fq_ring_free(iovad, fq); + + if (fq_full(fq)) { + iova_domain_flush(iovad); + fq_ring_free(iovad, fq); + } + + idx = fq_ring_add(fq); + + fq->entries[idx].iova_pfn = pfn; + fq->entries[idx].pages = pages; + fq->entries[idx].data = data; + fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); + + spin_unlock_irqrestore(&fq->lock, flags); + + if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) + mod_timer(&iovad->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); + + put_cpu_ptr(iovad->fq); +} +EXPORT_SYMBOL_GPL(queue_iova); + /** * put_iova_domain - destroys the iova doamin * @iovad: - iova domain in question. @@ -437,6 +615,7 @@ void put_iova_domain(struct iova_domain *iovad) struct rb_node *node; unsigned long flags; + free_iova_flush_queue(iovad); free_iova_rcaches(iovad); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); node = rb_first(&iovad->rbroot); @@ -729,7 +908,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, bool can_insert = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -759,7 +938,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, iova_magazine_push(cpu_rcache->loaded, iova_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); if (mag_to_free) { iova_magazine_free_pfns(mag_to_free, iovad); @@ -793,7 +971,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, bool has_pfn = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_empty(cpu_rcache->loaded)) { @@ -815,7 +993,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); return iova_pfn; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index b7e14ee863f9..195d6e93ac71 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -8,7 +8,9 @@ * the Free Software Foundation; version 2 of the License. */ +#include <linux/bitmap.h> #include <linux/delay.h> +#include <linux/dma-iommu.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/export.h> @@ -17,21 +19,29 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/sizes.h> #include <linux/slab.h> +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #include <asm/dma-iommu.h> #include <asm/pgalloc.h> +#endif #include "io-pgtable.h" +#define IPMMU_CTX_MAX 1 + struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; - struct list_head list; + struct iommu_device iommu; unsigned int num_utlbs; + spinlock_t lock; /* Protects ctx and domains[] */ + DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); + struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; struct dma_iommu_mapping *mapping; }; @@ -47,20 +57,22 @@ struct ipmmu_vmsa_domain { spinlock_t lock; /* Protects mappings */ }; -struct ipmmu_vmsa_archdata { +struct ipmmu_vmsa_iommu_priv { struct ipmmu_vmsa_device *mmu; - unsigned int *utlbs; - unsigned int num_utlbs; + struct device *dev; + struct list_head list; }; -static DEFINE_SPINLOCK(ipmmu_devices_lock); -static LIST_HEAD(ipmmu_devices); - static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) { return container_of(dom, struct ipmmu_vmsa_domain, io_domain); } +static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) +{ + return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; +} + #define TLB_LOOP_TIMEOUT 100 /* 100us */ /* ----------------------------------------------------------------------------- @@ -283,7 +295,7 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, /* The hardware doesn't support selective TLB flush. */ } -static struct iommu_gather_ops ipmmu_gather_ops = { +static const struct iommu_gather_ops ipmmu_gather_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, @@ -293,9 +305,42 @@ static struct iommu_gather_ops ipmmu_gather_ops = { * Domain/Context Management */ +static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, + struct ipmmu_vmsa_domain *domain) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&mmu->lock, flags); + + ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); + if (ret != IPMMU_CTX_MAX) { + mmu->domains[ret] = domain; + set_bit(ret, mmu->ctx); + } + + spin_unlock_irqrestore(&mmu->lock, flags); + + return ret; +} + +static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, + unsigned int context_id) +{ + unsigned long flags; + + spin_lock_irqsave(&mmu->lock, flags); + + clear_bit(context_id, mmu->ctx); + mmu->domains[context_id] = NULL; + + spin_unlock_irqrestore(&mmu->lock, flags); +} + static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; + int ret; /* * Allocate the page table operations. @@ -309,7 +354,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * non-secure mode. */ domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; - domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; domain->cfg.ias = 32; domain->cfg.oas = 40; domain->cfg.tlb = &ipmmu_gather_ops; @@ -321,16 +366,21 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ domain->cfg.iommu_dev = domain->mmu->dev; + /* + * Find an unused context. + */ + ret = ipmmu_domain_allocate_context(domain->mmu, domain); + if (ret == IPMMU_CTX_MAX) + return -EBUSY; + + domain->context_id = ret; + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain); - if (!domain->iop) + if (!domain->iop) { + ipmmu_domain_free_context(domain->mmu, domain->context_id); return -EINVAL; - - /* - * TODO: When adding support for multiple contexts, find an unused - * context. - */ - domain->context_id = 0; + } /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; @@ -382,6 +432,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) */ ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); ipmmu_tlb_sync(domain); + ipmmu_domain_free_context(domain->mmu, domain->context_id); } /* ----------------------------------------------------------------------------- @@ -439,29 +490,35 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) static irqreturn_t ipmmu_irq(int irq, void *dev) { struct ipmmu_vmsa_device *mmu = dev; - struct iommu_domain *io_domain; - struct ipmmu_vmsa_domain *domain; + irqreturn_t status = IRQ_NONE; + unsigned int i; + unsigned long flags; - if (!mmu->mapping) - return IRQ_NONE; + spin_lock_irqsave(&mmu->lock, flags); - io_domain = mmu->mapping->domain; - domain = to_vmsa_domain(io_domain); + /* + * Check interrupts for all active contexts. + */ + for (i = 0; i < IPMMU_CTX_MAX; i++) { + if (!mmu->domains[i]) + continue; + if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) + status = IRQ_HANDLED; + } - return ipmmu_domain_irq(domain); + spin_unlock_irqrestore(&mmu->lock, flags); + + return status; } /* ----------------------------------------------------------------------------- * IOMMU Operations */ -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) { struct ipmmu_vmsa_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -487,14 +544,15 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; - struct ipmmu_vmsa_device *mmu = archdata->mmu; + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct ipmmu_vmsa_device *mmu = priv->mmu; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned long flags; unsigned int i; int ret = 0; - if (!mmu) { + if (!priv || !priv->mmu) { dev_err(dev, "Cannot attach to IPMMU\n"); return -ENXIO; } @@ -513,15 +571,16 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", dev_name(mmu->dev), dev_name(domain->mmu->dev)); ret = -EINVAL; - } + } else + dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); spin_unlock_irqrestore(&domain->lock, flags); if (ret < 0) return ret; - for (i = 0; i < archdata->num_utlbs; ++i) - ipmmu_utlb_enable(domain, archdata->utlbs[i]); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_enable(domain, fwspec->ids[i]); return 0; } @@ -529,12 +588,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, static void ipmmu_detach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned int i; - for (i = 0; i < archdata->num_utlbs; ++i) - ipmmu_utlb_disable(domain, archdata->utlbs[i]); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_disable(domain, fwspec->ids[i]); /* * TODO: Optimize by disabling the context when no device is attached. @@ -570,82 +629,59 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, return domain->iop->iova_to_phys(domain->iop, iova); } -static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, - unsigned int *utlbs, unsigned int num_utlbs) +static int ipmmu_init_platform_device(struct device *dev, + struct of_phandle_args *args) { - unsigned int i; - - for (i = 0; i < num_utlbs; ++i) { - struct of_phandle_args args; - int ret; - - ret = of_parse_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells", i, &args); - if (ret < 0) - return ret; + struct platform_device *ipmmu_pdev; + struct ipmmu_vmsa_iommu_priv *priv; - of_node_put(args.np); - - if (args.np != mmu->dev->of_node || args.args_count != 1) - return -EINVAL; + ipmmu_pdev = of_find_device_by_node(args->np); + if (!ipmmu_pdev) + return -ENODEV; - utlbs[i] = args.args[0]; - } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->mmu = platform_get_drvdata(ipmmu_pdev); + priv->dev = dev; + dev->iommu_fwspec->iommu_priv = priv; return 0; } -static int ipmmu_add_device(struct device *dev) +static int ipmmu_of_xlate(struct device *dev, + struct of_phandle_args *spec) { - struct ipmmu_vmsa_archdata *archdata; - struct ipmmu_vmsa_device *mmu; - struct iommu_group *group = NULL; - unsigned int *utlbs; - unsigned int i; - int num_utlbs; - int ret = -ENODEV; + iommu_fwspec_add_ids(dev, spec->args, 1); - if (dev->archdata.iommu) { - dev_warn(dev, "IOMMU driver already assigned to device %s\n", - dev_name(dev)); - return -EINVAL; - } - - /* Find the master corresponding to the device. */ - - num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells"); - if (num_utlbs < 0) - return -ENODEV; + /* Initialize once - xlate() will call multiple times */ + if (to_priv(dev)) + return 0; - utlbs = kcalloc(num_utlbs, sizeof(*utlbs), GFP_KERNEL); - if (!utlbs) - return -ENOMEM; + return ipmmu_init_platform_device(dev, spec); +} - spin_lock(&ipmmu_devices_lock); +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) - list_for_each_entry(mmu, &ipmmu_devices, list) { - ret = ipmmu_find_utlbs(mmu, dev, utlbs, num_utlbs); - if (!ret) { - /* - * TODO Take a reference to the MMU to protect - * against device removal. - */ - break; - } - } +static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +{ + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; - spin_unlock(&ipmmu_devices_lock); + return __ipmmu_domain_alloc(type); +} - if (ret < 0) - goto error; +static int ipmmu_add_device(struct device *dev) +{ + struct ipmmu_vmsa_device *mmu = NULL; + struct iommu_group *group; + int ret; - for (i = 0; i < num_utlbs; ++i) { - if (utlbs[i] >= mmu->num_utlbs) { - ret = -EINVAL; - goto error; - } - } + /* + * Only let through devices that have been verified in xlate() + */ + if (!to_priv(dev)) + return -ENODEV; /* Create a device group and add the device to it. */ group = iommu_group_alloc(); @@ -664,17 +700,6 @@ static int ipmmu_add_device(struct device *dev) goto error; } - archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); - if (!archdata) { - ret = -ENOMEM; - goto error; - } - - archdata->mmu = mmu; - archdata->utlbs = utlbs; - archdata->num_utlbs = num_utlbs; - dev->archdata.iommu = archdata; - /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. @@ -684,6 +709,7 @@ static int ipmmu_add_device(struct device *dev) * - Make the mapping size configurable ? We currently use a 2GB mapping * at a 1GB offset to ensure that NULL VAs will fault. */ + mmu = to_priv(dev)->mmu; if (!mmu->mapping) { struct dma_iommu_mapping *mapping; @@ -708,12 +734,8 @@ static int ipmmu_add_device(struct device *dev) return 0; error: - arm_iommu_release_mapping(mmu->mapping); - - kfree(dev->archdata.iommu); - kfree(utlbs); - - dev->archdata.iommu = NULL; + if (mmu) + arm_iommu_release_mapping(mmu->mapping); if (!IS_ERR_OR_NULL(group)) iommu_group_remove_device(dev); @@ -723,15 +745,8 @@ error: static void ipmmu_remove_device(struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; - arm_iommu_detach_device(dev); iommu_group_remove_device(dev); - - kfree(archdata->utlbs); - kfree(archdata); - - dev->archdata.iommu = NULL; } static const struct iommu_ops ipmmu_ops = { @@ -746,8 +761,132 @@ static const struct iommu_ops ipmmu_ops = { .add_device = ipmmu_add_device, .remove_device = ipmmu_remove_device, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + .of_xlate = ipmmu_of_xlate, }; +#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ + +#ifdef CONFIG_IOMMU_DMA + +static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); +static LIST_HEAD(ipmmu_slave_devices); + +static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) +{ + struct iommu_domain *io_domain = NULL; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + io_domain = __ipmmu_domain_alloc(type); + break; + + case IOMMU_DOMAIN_DMA: + io_domain = __ipmmu_domain_alloc(type); + if (io_domain) + iommu_get_dma_cookie(io_domain); + break; + } + + return io_domain; +} + +static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) +{ + switch (io_domain->type) { + case IOMMU_DOMAIN_DMA: + iommu_put_dma_cookie(io_domain); + /* fall-through */ + default: + ipmmu_domain_free(io_domain); + break; + } +} + +static int ipmmu_add_device_dma(struct device *dev) +{ + struct iommu_group *group; + + /* + * Only let through devices that have been verified in xlate() + */ + if (!to_priv(dev)) + return -ENODEV; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + spin_lock(&ipmmu_slave_devices_lock); + list_add(&to_priv(dev)->list, &ipmmu_slave_devices); + spin_unlock(&ipmmu_slave_devices_lock); + return 0; +} + +static void ipmmu_remove_device_dma(struct device *dev) +{ + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + + spin_lock(&ipmmu_slave_devices_lock); + list_del(&priv->list); + spin_unlock(&ipmmu_slave_devices_lock); + + iommu_group_remove_device(dev); +} + +static struct device *ipmmu_find_sibling_device(struct device *dev) +{ + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; + bool found = false; + + spin_lock(&ipmmu_slave_devices_lock); + + list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { + if (priv == sibling_priv) + continue; + if (sibling_priv->mmu == priv->mmu) { + found = true; + break; + } + } + + spin_unlock(&ipmmu_slave_devices_lock); + + return found ? sibling_priv->dev : NULL; +} + +static struct iommu_group *ipmmu_find_group_dma(struct device *dev) +{ + struct iommu_group *group; + struct device *sibling; + + sibling = ipmmu_find_sibling_device(dev); + if (sibling) + group = iommu_group_get(sibling); + if (!sibling || IS_ERR(group)) + group = generic_device_group(dev); + + return group; +} + +static const struct iommu_ops ipmmu_ops = { + .domain_alloc = ipmmu_domain_alloc_dma, + .domain_free = ipmmu_domain_free_dma, + .attach_dev = ipmmu_attach_device, + .detach_dev = ipmmu_detach_device, + .map = ipmmu_map, + .unmap = ipmmu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = ipmmu_iova_to_phys, + .add_device = ipmmu_add_device_dma, + .remove_device = ipmmu_remove_device_dma, + .device_group = ipmmu_find_group_dma, + .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + .of_xlate = ipmmu_of_xlate, +}; + +#endif /* CONFIG_IOMMU_DMA */ + /* ----------------------------------------------------------------------------- * Probe/remove and init */ @@ -768,11 +907,6 @@ static int ipmmu_probe(struct platform_device *pdev) int irq; int ret; - if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { - dev_err(&pdev->dev, "missing platform data\n"); - return -EINVAL; - } - mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL); if (!mmu) { dev_err(&pdev->dev, "cannot allocate device data\n"); @@ -781,6 +915,8 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->dev = &pdev->dev; mmu->num_utlbs = 32; + spin_lock_init(&mmu->lock); + bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -817,16 +953,24 @@ static int ipmmu_probe(struct platform_device *pdev) ipmmu_device_reset(mmu); + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; + + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); + iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); + + ret = iommu_device_register(&mmu->iommu); + if (ret) + return ret; + /* * We can't create the ARM mapping here as it requires the bus to have * an IOMMU, which only happens when bus_set_iommu() is called in * ipmmu_init() after the probe function returns. */ - spin_lock(&ipmmu_devices_lock); - list_add(&mmu->list, &ipmmu_devices); - spin_unlock(&ipmmu_devices_lock); - platform_set_drvdata(pdev, mmu); return 0; @@ -836,11 +980,12 @@ static int ipmmu_remove(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); - spin_lock(&ipmmu_devices_lock); - list_del(&mmu->list); - spin_unlock(&ipmmu_devices_lock); + iommu_device_sysfs_remove(&mmu->iommu); + iommu_device_unregister(&mmu->iommu); +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) arm_iommu_release_mapping(mmu->mapping); +#endif ipmmu_device_reset(mmu); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index d0448353d501..04f4d51ffacb 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -393,6 +393,7 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev) static int msm_iommu_add_device(struct device *dev) { struct msm_iommu_dev *iommu; + struct iommu_group *group; unsigned long flags; int ret = 0; @@ -406,7 +407,16 @@ static int msm_iommu_add_device(struct device *dev) spin_unlock_irqrestore(&msm_iommu_lock, flags); - return ret; + if (ret) + return ret; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + + return 0; } static void msm_iommu_remove_device(struct device *dev) @@ -421,6 +431,8 @@ static void msm_iommu_remove_device(struct device *dev) iommu_device_unlink(&iommu->iommu, dev); spin_unlock_irqrestore(&msm_iommu_lock, flags); + + iommu_group_remove_device(dev); } static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -700,6 +712,7 @@ static struct iommu_ops msm_iommu_ops = { .iova_to_phys = msm_iommu_iova_to_phys, .add_device = msm_iommu_add_device, .remove_device = msm_iommu_remove_device, + .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, }; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5d14cd15198d..bd515be5b380 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -31,7 +31,6 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <asm/barrier.h> -#include <dt-bindings/memory/mt8173-larb-port.h> #include <soc/mediatek/smi.h> #include "mtk_iommu.h" @@ -54,10 +53,16 @@ #define REG_MMU_CTRL_REG 0x110 #define F_MMU_PREFETCH_RT_REPLACE_MOD BIT(4) -#define F_MMU_TF_PROTECT_SEL(prot) (((prot) & 0x3) << 5) +#define F_MMU_TF_PROTECT_SEL_SHIFT(data) \ + ((data)->m4u_plat == M4U_MT2712 ? 4 : 5) +/* It's named by F_MMU_TF_PROT_SEL in mt2712. */ +#define F_MMU_TF_PROTECT_SEL(prot, data) \ + (((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data)) #define REG_MMU_IVRP_PADDR 0x114 #define F_MMU_IVRP_PA_SET(pa, ext) (((pa) >> 1) | ((!!(ext)) << 31)) +#define REG_MMU_VLD_PA_RNG 0x118 +#define F_MMU_VLD_PA_RNG(EA, SA) (((EA) << 8) | (SA)) #define REG_MMU_INT_CONTROL0 0x120 #define F_L2_MULIT_HIT_EN BIT(0) @@ -82,7 +87,6 @@ #define REG_MMU_FAULT_ST1 0x134 #define REG_MMU_FAULT_VA 0x13c -#define F_MMU_FAULT_VA_MSK 0xfffff000 #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -93,6 +97,13 @@ #define MTK_PROTECT_PA_ALIGN 128 +/* + * Get the local arbiter ID and the portid within the larb arbiter + * from mtk_m4u_id which is defined by MTK_M4U_ID. + */ +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) +#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) + struct mtk_iommu_domain { spinlock_t pgtlock; /* lock for page table */ @@ -104,6 +115,27 @@ struct mtk_iommu_domain { static struct iommu_ops mtk_iommu_ops; +static LIST_HEAD(m4ulist); /* List all the M4U HWs */ + +#define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) + +/* + * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain + * for the performance. + * + * Here always return the mtk_iommu_data of the first probed M4U where the + * iommu domain information is recorded. + */ +static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void) +{ + struct mtk_iommu_data *data; + + for_each_m4u(data) + return data; + + return NULL; +} + static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) { return container_of(dom, struct mtk_iommu_domain, domain); @@ -113,9 +145,12 @@ static void mtk_iommu_tlb_flush_all(void *cookie) { struct mtk_iommu_data *data = cookie; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); - writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); - wmb(); /* Make sure the tlb flush all done */ + for_each_m4u(data) { + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); + writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + wmb(); /* Make sure the tlb flush all done */ + } } static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, @@ -124,11 +159,17 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, { struct mtk_iommu_data *data = cookie; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); + for_each_m4u(data) { + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A); - writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); + writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); + writel_relaxed(iova + size - 1, + data->base + REG_MMU_INVLD_END_A); + writel_relaxed(F_MMU_INV_RANGE, + data->base + REG_MMU_INVALIDATE); + data->tlb_flush_active = true; + } } static void mtk_iommu_tlb_sync(void *cookie) @@ -137,15 +178,22 @@ static void mtk_iommu_tlb_sync(void *cookie) int ret; u32 tmp; - ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp, - tmp != 0, 10, 100000); - if (ret) { - dev_warn(data->dev, - "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(cookie); + for_each_m4u(data) { + /* Avoid timing out if there's nothing to wait for */ + if (!data->tlb_flush_active) + return; + + ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, + tmp, tmp != 0, 10, 100000); + if (ret) { + dev_warn(data->dev, + "Partial TLB flush timed out, falling back to full flush\n"); + mtk_iommu_tlb_flush_all(cookie); + } + /* Clear the CPE status */ + writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + data->tlb_flush_active = false; } - /* Clear the CPE status */ - writel_relaxed(0, data->base + REG_MMU_CPE_DONE); } static const struct iommu_gather_ops mtk_iommu_gather_ops = { @@ -167,7 +215,6 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA); layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; - fault_iova &= F_MMU_FAULT_VA_MSK; fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA); regval = readl_relaxed(data->base + REG_MMU_INT_ID); fault_larb = F_MMU0_INT_ID_LARB_ID(regval); @@ -215,9 +262,9 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } } -static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) +static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) { - struct mtk_iommu_domain *dom = data->m4u_dom; + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); spin_lock_init(&dom->pgtlock); @@ -243,9 +290,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - - writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], - data->base + REG_MMU_PT_BASE_ADDR); return 0; } @@ -260,20 +304,30 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (!dom) return NULL; - if (iommu_get_dma_cookie(&dom->domain)) { - kfree(dom); - return NULL; - } + if (iommu_get_dma_cookie(&dom->domain)) + goto free_dom; + + if (mtk_iommu_domain_finalise(dom)) + goto put_dma_cookie; dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); dom->domain.geometry.force_aperture = true; return &dom->domain; + +put_dma_cookie: + iommu_put_dma_cookie(&dom->domain); +free_dom: + kfree(dom); + return NULL; } static void mtk_iommu_domain_free(struct iommu_domain *domain) { + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + free_io_pgtable_ops(dom->iop); iommu_put_dma_cookie(domain); kfree(to_mtk_domain(domain)); } @@ -283,22 +337,15 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; - int ret; if (!data) return -ENODEV; + /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { data->m4u_dom = dom; - ret = mtk_iommu_domain_finalise(data); - if (ret) { - data->m4u_dom = NULL; - return ret; - } - } else if (data->m4u_dom != dom) { - /* All the client devices should be in the same m4u domain */ - dev_err(dev, "try to attach into the error iommu domain\n"); - return -EPERM; + writel(dom->cfg.arm_v7s_cfg.ttbr[0], + data->base + REG_MMU_PT_BASE_ADDR); } mtk_iommu_config(data, dev, true); @@ -348,6 +395,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); unsigned long flags; phys_addr_t pa; @@ -355,6 +403,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, pa = dom->iop->iova_to_phys(dom->iop, iova); spin_unlock_irqrestore(&dom->pgtlock, flags); + if (data->enable_4GB) + pa |= BIT_ULL(32); + return pa; } @@ -393,7 +444,7 @@ static void mtk_iommu_remove_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); if (!data) return ERR_PTR(-ENODEV); @@ -458,8 +509,9 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return ret; } - regval = F_MMU_PREFETCH_RT_REPLACE_MOD | - F_MMU_TF_PROTECT_SEL(2); + regval = F_MMU_TF_PROTECT_SEL(2, data); + if (data->m4u_plat == M4U_MT8173) + regval |= F_MMU_PREFETCH_RT_REPLACE_MOD; writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); regval = F_L2_MULIT_HIT_EN | @@ -481,9 +533,19 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), data->base + REG_MMU_IVRP_PADDR); - + if (data->enable_4GB && data->m4u_plat != M4U_MT8173) { + /* + * If 4GB mode is enabled, the validate PA range is from + * 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30]. + */ + regval = F_MMU_VLD_PA_RNG(7, 4); + writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); + } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); - writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); + + /* It's MISC control register whose default value is ok except mt8173.*/ + if (data->m4u_plat == M4U_MT8173) + writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { @@ -515,6 +577,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (!data) return -ENOMEM; data->dev = dev; + data->m4u_plat = (enum mtk_iommu_plat)of_device_get_match_data(dev); /* Protect memory. HW will access here while translation fault.*/ protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL); @@ -523,7 +586,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); /* Whether the current dram is over 4GB */ - data->enable_4GB = !!(max_pfn > (0xffffffffUL >> PAGE_SHIFT)); + data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT)); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); data->base = devm_ioremap_resource(dev, res); @@ -548,6 +611,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) for (i = 0; i < larb_nr; i++) { struct device_node *larbnode; struct platform_device *plarbdev; + u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); if (!larbnode) @@ -556,17 +620,14 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (!of_device_is_available(larbnode)) continue; + ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); + if (ret)/* The id is consecutive if there is no this property */ + id = i; + plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) { - plarbdev = of_platform_device_create( - larbnode, NULL, - platform_bus_type.dev_root); - if (!plarbdev) { - of_node_put(larbnode); - return -EPROBE_DEFER; - } - } - data->smi_imu.larb_imu[i].dev = &plarbdev->dev; + if (!plarbdev) + return -EPROBE_DEFER; + data->smi_imu.larb_imu[id].dev = &plarbdev->dev; component_match_add_release(dev, &match, release_of, compare_of, larbnode); @@ -590,6 +651,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; + list_add_tail(&data->list, &m4ulist); + if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); @@ -606,7 +669,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) if (iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, NULL); - free_io_pgtable_ops(data->m4u_dom->iop); clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); @@ -625,6 +687,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0); reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL); + clk_disable_unprepare(data->bclk); return 0; } @@ -633,9 +696,13 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; + int ret; - writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], - base + REG_MMU_PT_BASE_ADDR); + ret = clk_prepare_enable(data->bclk); + if (ret) { + dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); + return ret; + } writel_relaxed(reg->standard_axi_mode, base + REG_MMU_STANDARD_AXI_MODE); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); @@ -644,15 +711,19 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), base + REG_MMU_IVRP_PADDR); + if (data->m4u_dom) + writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], + base + REG_MMU_PT_BASE_ADDR); return 0; } -const struct dev_pm_ops mtk_iommu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) +static const struct dev_pm_ops mtk_iommu_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) }; static const struct of_device_id mtk_iommu_of_ids[] = { - { .compatible = "mediatek,mt8173-m4u", }, + { .compatible = "mediatek,mt2712-m4u", .data = (void *)M4U_MT2712}, + { .compatible = "mediatek,mt8173-m4u", .data = (void *)M4U_MT8173}, {} }; @@ -661,27 +732,20 @@ static struct platform_driver mtk_iommu_driver = { .remove = mtk_iommu_remove, .driver = { .name = "mtk-iommu", - .of_match_table = mtk_iommu_of_ids, + .of_match_table = of_match_ptr(mtk_iommu_of_ids), .pm = &mtk_iommu_pm_ops, } }; -static int mtk_iommu_init_fn(struct device_node *np) +static int __init mtk_iommu_init(void) { int ret; - struct platform_device *pdev; - - pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root); - if (!pdev) - return -ENOMEM; ret = platform_driver_register(&mtk_iommu_driver); - if (ret) { - pr_err("%s: Failed to register driver\n", __func__); - return ret; - } + if (ret != 0) + pr_err("Failed to register MTK IOMMU driver\n"); - return 0; + return ret; } -IOMMU_OF_DECLARE(mtkm4u, "mediatek,mt8173-m4u", mtk_iommu_init_fn); +subsys_initcall(mtk_iommu_init) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 2a28eadeea0e..b4451a1c7c2f 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -34,6 +34,12 @@ struct mtk_iommu_suspend_reg { u32 int_main_control; }; +enum mtk_iommu_plat { + M4U_MT2701, + M4U_MT2712, + M4U_MT8173, +}; + struct mtk_iommu_domain; struct mtk_iommu_data { @@ -47,8 +53,12 @@ struct mtk_iommu_data { struct iommu_group *m4u_group; struct mtk_smi_iommu smi_imu; /* SMI larb iommu info */ bool enable_4GB; + bool tlb_flush_active; struct iommu_device iommu; + enum mtk_iommu_plat m4u_plat; + + struct list_head list; }; static inline int compare_of(struct device *dev, void *data) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a27ef570c328..bc1efbfb9ddf 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -18,6 +18,7 @@ #include <linux/clk.h> #include <linux/component.h> #include <linux/device.h> +#include <linux/dma-mapping.h> #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 9f44ee8ea1bc..e60e3dba85a0 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -25,6 +25,8 @@ #include <linux/of_pci.h> #include <linux/slab.h> +#define NO_IOMMU 1 + static const struct of_device_id __iommu_of_table_sentinel __used __section(__iommu_of_table_end); @@ -103,14 +105,14 @@ static bool of_iommu_driver_present(struct device_node *np) * it never will be. We don't want to defer indefinitely, nor attempt * to dereference __iommu_of_table after it's been freed. */ - if (system_state > SYSTEM_BOOTING) + if (system_state >= SYSTEM_RUNNING) return false; return of_match_node(&__iommu_of_table, np); } -static const struct iommu_ops -*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) +static int of_iommu_xlate(struct device *dev, + struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; @@ -118,96 +120,55 @@ static const struct iommu_ops ops = iommu_ops_from_fwnode(fwnode); if ((ops && !ops->of_xlate) || + !of_device_is_available(iommu_spec->np) || (!ops && !of_iommu_driver_present(iommu_spec->np))) - return NULL; + return NO_IOMMU; err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); if (err) - return ERR_PTR(err); + return err; /* * The otherwise-empty fwspec handily serves to indicate the specific * IOMMU device we're waiting for, which will be useful if we ever get * a proper probe-ordering dependency mechanism in future. */ if (!ops) - return ERR_PTR(-EPROBE_DEFER); - - err = ops->of_xlate(dev, iommu_spec); - if (err) - return ERR_PTR(err); + return -EPROBE_DEFER; - return ops; + return ops->of_xlate(dev, iommu_spec); } -static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) -{ - struct of_phandle_args *iommu_spec = data; - - iommu_spec->args[0] = alias; - return iommu_spec->np == pdev->bus->dev.of_node; -} +struct of_pci_iommu_alias_info { + struct device *dev; + struct device_node *np; +}; -static const struct iommu_ops -*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np) +static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) { - const struct iommu_ops *ops; - struct of_phandle_args iommu_spec; + struct of_pci_iommu_alias_info *info = data; + struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - /* - * Start by tracing the RID alias down the PCI topology as - * far as the host bridge whose OF node we have... - * (we're not even attempting to handle multi-alias devices yet) - */ - iommu_spec.args_count = 1; - iommu_spec.np = bridge_np; - pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec); - /* - * ...then find out what that becomes once it escapes the PCI - * bus into the system beyond, and which IOMMU it ends up at. - */ - iommu_spec.np = NULL; - err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", + err = of_pci_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? NULL : ERR_PTR(err); - - ops = of_iommu_xlate(&pdev->dev, &iommu_spec); + return err == -ENODEV ? NO_IOMMU : err; + err = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - return ops; -} - -static const struct iommu_ops -*of_platform_iommu_init(struct device *dev, struct device_node *np) -{ - struct of_phandle_args iommu_spec; - const struct iommu_ops *ops = NULL; - int idx = 0; - - /* - * We don't currently walk up the tree looking for a parent IOMMU. - * See the `Notes:' section of - * Documentation/devicetree/bindings/iommu/iommu.txt - */ - while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", - idx, &iommu_spec)) { - ops = of_iommu_xlate(dev, &iommu_spec); - of_node_put(iommu_spec.np); - idx++; - if (IS_ERR_OR_NULL(ops)) - break; - } + if (err) + return err; - return ops; + return info->np == pdev->bus->dev.of_node; } const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { - const struct iommu_ops *ops; + const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec = dev->iommu_fwspec; + int err = NO_IOMMU; if (!master_np) return NULL; @@ -220,20 +181,55 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, iommu_fwspec_free(dev); } - if (dev_is_pci(dev)) - ops = of_pci_iommu_init(to_pci_dev(dev), master_np); - else - ops = of_platform_iommu_init(dev, master_np); + /* + * We don't currently walk up the tree looking for a parent IOMMU. + * See the `Notes:' section of + * Documentation/devicetree/bindings/iommu/iommu.txt + */ + if (dev_is_pci(dev)) { + struct of_pci_iommu_alias_info info = { + .dev = dev, + .np = master_np, + }; + + err = pci_for_each_dma_alias(to_pci_dev(dev), + of_pci_iommu_init, &info); + } else { + struct of_phandle_args iommu_spec; + int idx = 0; + + while (!of_parse_phandle_with_args(master_np, "iommus", + "#iommu-cells", + idx, &iommu_spec)) { + err = of_iommu_xlate(dev, &iommu_spec); + of_node_put(iommu_spec.np); + idx++; + if (err) + break; + } + } + + /* + * Two success conditions can be represented by non-negative err here: + * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons + * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately + * <0 : any actual error + */ + if (!err) + ops = dev->iommu_fwspec->ops; /* * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ - if (!IS_ERR_OR_NULL(ops) && ops->add_device && - dev->bus && !dev->iommu_group) { - int err = ops->add_device(dev); - - if (err) - ops = ERR_PTR(err); + if (ops && ops->add_device && dev->bus && !dev->iommu_group) + err = ops->add_device(dev); + + /* Ignore all other errors apart from EPROBE_DEFER */ + if (err == -EPROBE_DEFER) { + ops = ERR_PTR(err); + } else if (err < 0) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); + ops = NULL; } return ops; @@ -248,8 +244,7 @@ static int __init of_iommu_init(void) const of_iommu_init_fn init_fn = match->data; if (init_fn && init_fn(np)) - pr_err("Failed to initialise IOMMU %s\n", - of_node_full_name(np)); + pr_err("Failed to initialise IOMMU %pOF\n", np); } return 0; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 95dfca36ccb9..bd67e1b2c64e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ +#include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/interrupt.h> @@ -29,8 +30,6 @@ #include <linux/regmap.h> #include <linux/mfd/syscon.h> -#include <asm/cacheflush.h> - #include <linux/platform_data/iommu-omap.h> #include "omap-iopgtable.h" @@ -454,36 +453,35 @@ static void flush_iotlb_all(struct omap_iommu *obj) /* * H/W pagetable operations */ -static void flush_iopgd_range(u32 *first, u32 *last) +static void flush_iopte_range(struct device *dev, dma_addr_t dma, + unsigned long offset, int num_entries) { - /* FIXME: L2 cache should be taken care of if it exists */ - do { - asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pgd" - : : "r" (first)); - first += L1_CACHE_BYTES / sizeof(*first); - } while (first <= last); -} + size_t size = num_entries * sizeof(u32); -static void flush_iopte_range(u32 *first, u32 *last) -{ - /* FIXME: L2 cache should be taken care of if it exists */ - do { - asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pte" - : : "r" (first)); - first += L1_CACHE_BYTES / sizeof(*first); - } while (first <= last); + dma_sync_single_range_for_device(dev, dma, offset, size, DMA_TO_DEVICE); } -static void iopte_free(u32 *iopte) +static void iopte_free(struct omap_iommu *obj, u32 *iopte, bool dma_valid) { + dma_addr_t pt_dma; + /* Note: freed iopte's must be clean ready for re-use */ - if (iopte) + if (iopte) { + if (dma_valid) { + pt_dma = virt_to_phys(iopte); + dma_unmap_single(obj->dev, pt_dma, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + } + kmem_cache_free(iopte_cachep, iopte); + } } -static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) +static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, + dma_addr_t *pt_dma, u32 da) { u32 *iopte; + unsigned long offset = iopgd_index(da) * sizeof(da); /* a table has already existed */ if (*iopgd) @@ -500,18 +498,38 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) if (!iopte) return ERR_PTR(-ENOMEM); + *pt_dma = dma_map_single(obj->dev, iopte, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(obj->dev, *pt_dma)) { + dev_err(obj->dev, "DMA map error for L2 table\n"); + iopte_free(obj, iopte, false); + return ERR_PTR(-ENOMEM); + } + + /* + * we rely on dma address and the physical address to be + * the same for mapping the L2 table + */ + if (WARN_ON(*pt_dma != virt_to_phys(iopte))) { + dev_err(obj->dev, "DMA translation error for L2 table\n"); + dma_unmap_single(obj->dev, *pt_dma, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + iopte_free(obj, iopte, false); + return ERR_PTR(-ENOMEM); + } + *iopgd = virt_to_phys(iopte) | IOPGD_TABLE; - flush_iopgd_range(iopgd, iopgd); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); } else { /* We raced, free the reduniovant table */ - iopte_free(iopte); + iopte_free(obj, iopte, false); } pte_ready: iopte = iopte_offset(iopgd, da); - + *pt_dma = virt_to_phys(iopte); dev_vdbg(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n", __func__, da, iopgd, *iopgd, iopte, *iopte); @@ -522,6 +540,7 @@ pte_ready: static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); + unsigned long offset = iopgd_index(da) * sizeof(da); if ((da | pa) & ~IOSECTION_MASK) { dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n", @@ -530,13 +549,14 @@ static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) } *iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION; - flush_iopgd_range(iopgd, iopgd); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); return 0; } static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); + unsigned long offset = iopgd_index(da) * sizeof(da); int i; if ((da | pa) & ~IOSUPER_MASK) { @@ -547,20 +567,22 @@ static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) for (i = 0; i < 16; i++) *(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER; - flush_iopgd_range(iopgd, iopgd + 15); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 16); return 0; } static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); - u32 *iopte = iopte_alloc(obj, iopgd, da); + dma_addr_t pt_dma; + u32 *iopte = iopte_alloc(obj, iopgd, &pt_dma, da); + unsigned long offset = iopte_index(da) * sizeof(da); if (IS_ERR(iopte)) return PTR_ERR(iopte); *iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL; - flush_iopte_range(iopte, iopte); + flush_iopte_range(obj->dev, pt_dma, offset, 1); dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n", __func__, da, pa, iopte, *iopte); @@ -571,7 +593,9 @@ static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); - u32 *iopte = iopte_alloc(obj, iopgd, da); + dma_addr_t pt_dma; + u32 *iopte = iopte_alloc(obj, iopgd, &pt_dma, da); + unsigned long offset = iopte_index(da) * sizeof(da); int i; if ((da | pa) & ~IOLARGE_MASK) { @@ -585,7 +609,7 @@ static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) for (i = 0; i < 16; i++) *(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE; - flush_iopte_range(iopte, iopte + 15); + flush_iopte_range(obj->dev, pt_dma, offset, 16); return 0; } @@ -674,6 +698,9 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) size_t bytes; u32 *iopgd = iopgd_offset(obj, da); int nent = 1; + dma_addr_t pt_dma; + unsigned long pd_offset = iopgd_index(da) * sizeof(da); + unsigned long pt_offset = iopte_index(da) * sizeof(da); if (!*iopgd) return 0; @@ -690,7 +717,8 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) } bytes *= nent; memset(iopte, 0, nent * sizeof(*iopte)); - flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte)); + pt_dma = virt_to_phys(iopte); + flush_iopte_range(obj->dev, pt_dma, pt_offset, nent); /* * do table walk to check if this table is necessary or not @@ -700,7 +728,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) if (iopte[i]) goto out; - iopte_free(iopte); + iopte_free(obj, iopte, true); nent = 1; /* for the next L1 entry */ } else { bytes = IOPGD_SIZE; @@ -712,7 +740,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) bytes *= nent; } memset(iopgd, 0, nent * sizeof(*iopgd)); - flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd)); + flush_iopte_range(obj->dev, obj->pd_dma, pd_offset, nent); out: return bytes; } @@ -738,6 +766,7 @@ static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da) static void iopgtable_clear_entry_all(struct omap_iommu *obj) { + unsigned long offset; int i; spin_lock(&obj->page_table_lock); @@ -748,15 +777,16 @@ static void iopgtable_clear_entry_all(struct omap_iommu *obj) da = i << IOPGD_SHIFT; iopgd = iopgd_offset(obj, da); + offset = iopgd_index(da) * sizeof(da); if (!*iopgd) continue; if (iopgd_is_table(*iopgd)) - iopte_free(iopte_offset(iopgd, 0)); + iopte_free(obj, iopte_offset(iopgd, 0), true); *iopgd = 0; - flush_iopgd_range(iopgd, iopgd); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); } flush_iotlb_all(obj); @@ -786,7 +816,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) if (!report_iommu_fault(domain, obj->dev, da, 0)) return IRQ_HANDLED; - iommu_disable(obj); + iommu_write_reg(obj, 0, MMU_IRQENABLE); iopgd = iopgd_offset(obj, da); @@ -815,10 +845,18 @@ static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) spin_lock(&obj->iommu_lock); + obj->pd_dma = dma_map_single(obj->dev, iopgd, IOPGD_TABLE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(obj->dev, obj->pd_dma)) { + dev_err(obj->dev, "DMA map error for L1 table\n"); + err = -ENOMEM; + goto out_err; + } + obj->iopgd = iopgd; err = iommu_enable(obj); if (err) - goto err_enable; + goto out_err; flush_iotlb_all(obj); spin_unlock(&obj->iommu_lock); @@ -827,7 +865,7 @@ static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) return 0; -err_enable: +out_err: spin_unlock(&obj->iommu_lock); return err; @@ -844,7 +882,10 @@ static void omap_iommu_detach(struct omap_iommu *obj) spin_lock(&obj->iommu_lock); + dma_unmap_single(obj->dev, obj->pd_dma, IOPGD_TABLE_SIZE, + DMA_TO_DEVICE); iommu_disable(obj); + obj->pd_dma = 0; obj->iopgd = NULL; spin_unlock(&obj->iommu_lock); @@ -1008,11 +1049,6 @@ static struct platform_driver omap_iommu_driver = { }, }; -static void iopte_cachep_ctor(void *iopte) -{ - clean_dcache_area(iopte, IOPTE_TABLE_SIZE); -} - static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz) { memset(e, 0, sizeof(*e)); @@ -1159,7 +1195,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) goto fail_align; - clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE); spin_lock_init(&omap_domain->lock); omap_domain->domain.geometry.aperture_start = 0; @@ -1309,7 +1344,7 @@ static void omap_iommu_remove_device(struct device *dev) static struct iommu_group *omap_iommu_device_group(struct device *dev) { struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; - struct iommu_group *group = NULL; + struct iommu_group *group = ERR_PTR(-EINVAL); if (arch_data->iommu_dev) group = arch_data->iommu_dev->group; @@ -1347,7 +1382,7 @@ static int __init omap_iommu_init(void) of_node_put(np); p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags, - iopte_cachep_ctor); + NULL); if (!p) return -ENOMEM; iopte_cachep = p; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 6e70515e6038..a675af29a6ec 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -61,6 +61,7 @@ struct omap_iommu { */ u32 *iopgd; spinlock_t page_table_lock; /* protect iopgd */ + dma_addr_t pd_dma; int nr_tlb_entries; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c new file mode 100644 index 000000000000..c8a587d034b0 --- /dev/null +++ b/drivers/iommu/qcom_iommu.c @@ -0,0 +1,930 @@ +/* + * IOMMU API for QCOM secure IOMMUs. Somewhat based on arm-smmu.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Copyright (C) 2013 ARM Limited + * Copyright (C) 2017 Red Hat + */ + +#include <linux/atomic.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dma-iommu.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/kconfig.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_iommu.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/qcom_scm.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "io-pgtable.h" +#include "arm-smmu-regs.h" + +#define SMMU_INTR_SEL_NS 0x2000 + +struct qcom_iommu_ctx; + +struct qcom_iommu_dev { + /* IOMMU core code handle */ + struct iommu_device iommu; + struct device *dev; + struct clk *iface_clk; + struct clk *bus_clk; + void __iomem *local_base; + u32 sec_id; + u8 num_ctxs; + struct qcom_iommu_ctx *ctxs[0]; /* indexed by asid-1 */ +}; + +struct qcom_iommu_ctx { + struct device *dev; + void __iomem *base; + bool secure_init; + u8 asid; /* asid and ctx bank # are 1:1 */ +}; + +struct qcom_iommu_domain { + struct io_pgtable_ops *pgtbl_ops; + spinlock_t pgtbl_lock; + struct mutex init_mutex; /* Protects iommu pointer */ + struct iommu_domain domain; + struct qcom_iommu_dev *iommu; +}; + +static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct qcom_iommu_domain, domain); +} + +static const struct iommu_ops qcom_iommu_ops; + +static struct qcom_iommu_dev * to_iommu(struct iommu_fwspec *fwspec) +{ + if (!fwspec || fwspec->ops != &qcom_iommu_ops) + return NULL; + return fwspec->iommu_priv; +} + +static struct qcom_iommu_ctx * to_ctx(struct iommu_fwspec *fwspec, unsigned asid) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); + if (!qcom_iommu) + return NULL; + return qcom_iommu->ctxs[asid - 1]; +} + +static inline void +iommu_writel(struct qcom_iommu_ctx *ctx, unsigned reg, u32 val) +{ + writel_relaxed(val, ctx->base + reg); +} + +static inline void +iommu_writeq(struct qcom_iommu_ctx *ctx, unsigned reg, u64 val) +{ + writeq_relaxed(val, ctx->base + reg); +} + +static inline u32 +iommu_readl(struct qcom_iommu_ctx *ctx, unsigned reg) +{ + return readl_relaxed(ctx->base + reg); +} + +static inline u64 +iommu_readq(struct qcom_iommu_ctx *ctx, unsigned reg) +{ + return readq_relaxed(ctx->base + reg); +} + +static void qcom_iommu_tlb_sync(void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + unsigned int val, ret; + + iommu_writel(ctx, ARM_SMMU_CB_TLBSYNC, 0); + + ret = readl_poll_timeout(ctx->base + ARM_SMMU_CB_TLBSTATUS, val, + (val & 0x1) == 0, 0, 5000000); + if (ret) + dev_err(ctx->dev, "timeout waiting for TLB SYNC\n"); + } +} + +static void qcom_iommu_tlb_inv_context(void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + iommu_writel(ctx, ARM_SMMU_CB_S1_TLBIASID, ctx->asid); + } + + qcom_iommu_tlb_sync(cookie); +} + +static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i, reg; + + reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + size_t s = size; + + iova &= ~12UL; + iova |= ctx->asid; + do { + iommu_writel(ctx, reg, iova); + iova += granule; + } while (s -= granule); + } +} + +static const struct iommu_gather_ops qcom_gather_ops = { + .tlb_flush_all = qcom_iommu_tlb_inv_context, + .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, + .tlb_sync = qcom_iommu_tlb_sync, +}; + +static irqreturn_t qcom_iommu_fault(int irq, void *dev) +{ + struct qcom_iommu_ctx *ctx = dev; + u32 fsr, fsynr; + u64 iova; + + fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); + + if (!(fsr & FSR_FAULT)) + return IRQ_NONE; + + fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); + iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); + + dev_err_ratelimited(ctx->dev, + "Unhandled context fault: fsr=0x%x, " + "iova=0x%016llx, fsynr=0x%x, cb=%d\n", + fsr, iova, fsynr, ctx->asid); + + iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); + + return IRQ_HANDLED; +} + +static int qcom_iommu_init_domain(struct iommu_domain *domain, + struct qcom_iommu_dev *qcom_iommu, + struct iommu_fwspec *fwspec) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg pgtbl_cfg; + int i, ret = 0; + u32 reg; + + mutex_lock(&qcom_domain->init_mutex); + if (qcom_domain->iommu) + goto out_unlock; + + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, + .ias = 32, + .oas = 40, + .tlb = &qcom_gather_ops, + .iommu_dev = qcom_iommu->dev, + }; + + qcom_domain->iommu = qcom_iommu; + pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, fwspec); + if (!pgtbl_ops) { + dev_err(qcom_iommu->dev, "failed to allocate pagetable ops\n"); + ret = -ENOMEM; + goto out_clear_iommu; + } + + /* Update the domain's page sizes to reflect the page table format */ + domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + domain->geometry.aperture_end = (1ULL << pgtbl_cfg.ias) - 1; + domain->geometry.force_aperture = true; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + + if (!ctx->secure_init) { + ret = qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, ctx->asid); + if (ret) { + dev_err(qcom_iommu->dev, "secure init failed: %d\n", ret); + goto out_clear_iommu; + } + ctx->secure_init = true; + } + + /* TTBRs */ + iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, + pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | + ((u64)ctx->asid << TTBRn_ASID_SHIFT)); + iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, + pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | + ((u64)ctx->asid << TTBRn_ASID_SHIFT)); + + /* TTBCR */ + iommu_writel(ctx, ARM_SMMU_CB_TTBCR2, + (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | + TTBCR2_SEP_UPSTREAM); + iommu_writel(ctx, ARM_SMMU_CB_TTBCR, + pgtbl_cfg.arm_lpae_s1_cfg.tcr); + + /* MAIRs (stage-1 only) */ + iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, + pgtbl_cfg.arm_lpae_s1_cfg.mair[0]); + iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR1, + pgtbl_cfg.arm_lpae_s1_cfg.mair[1]); + + /* SCTLR */ + reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | + SCTLR_M | SCTLR_S1_ASIDPNE; + + if (IS_ENABLED(CONFIG_BIG_ENDIAN)) + reg |= SCTLR_E; + + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); + } + + mutex_unlock(&qcom_domain->init_mutex); + + /* Publish page table ops for map/unmap */ + qcom_domain->pgtbl_ops = pgtbl_ops; + + return 0; + +out_clear_iommu: + qcom_domain->iommu = NULL; +out_unlock: + mutex_unlock(&qcom_domain->init_mutex); + return ret; +} + +static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type) +{ + struct qcom_iommu_domain *qcom_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + return NULL; + /* + * Allocate the domain and initialise some of its data structures. + * We can't really do anything meaningful until we've added a + * master. + */ + qcom_domain = kzalloc(sizeof(*qcom_domain), GFP_KERNEL); + if (!qcom_domain) + return NULL; + + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&qcom_domain->domain)) { + kfree(qcom_domain); + return NULL; + } + + mutex_init(&qcom_domain->init_mutex); + spin_lock_init(&qcom_domain->pgtbl_lock); + + return &qcom_domain->domain; +} + +static void qcom_iommu_domain_free(struct iommu_domain *domain) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + + if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ + return; + + iommu_put_dma_cookie(domain); + + /* NOTE: unmap can be called after client device is powered off, + * for example, with GPUs or anything involving dma-buf. So we + * cannot rely on the device_link. Make sure the IOMMU is on to + * avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + + pm_runtime_put_sync(qcom_domain->iommu->dev); + + kfree(qcom_domain); +} + +static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + int ret; + + if (!qcom_iommu) { + dev_err(dev, "cannot attach to IOMMU, is it on the same bus?\n"); + return -ENXIO; + } + + /* Ensure that the domain is finalized */ + pm_runtime_get_sync(qcom_iommu->dev); + ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec); + pm_runtime_put_sync(qcom_iommu->dev); + if (ret < 0) + return ret; + + /* + * Sanity check the domain. We don't support domains across + * different IOMMUs. + */ + if (qcom_domain->iommu != qcom_iommu) { + dev_err(dev, "cannot attach to IOMMU %s while already " + "attached to domain on IOMMU %s\n", + dev_name(qcom_domain->iommu->dev), + dev_name(qcom_iommu->dev)); + return -EINVAL; + } + + return 0; +} + +static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + unsigned i; + + if (!qcom_domain->iommu) + return; + + pm_runtime_get_sync(qcom_iommu->dev); + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + + /* Disable the context bank: */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + } + pm_runtime_put_sync(qcom_iommu->dev); + + qcom_domain->iommu = NULL; +} + +static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + int ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return -ENODEV; + + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->map(ops, iova, paddr, size, prot); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + return ret; +} + +static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + size_t ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return 0; + + /* NOTE: unmap can be called after client device is powered off, + * for example, with GPUs or anything involving dma-buf. So we + * cannot rely on the device_link. Make sure the IOMMU is on to + * avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->unmap(ops, iova, size); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + pm_runtime_put_sync(qcom_domain->iommu->dev); + + return ret; +} + +static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + phys_addr_t ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return 0; + + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->iova_to_phys(ops, iova); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + + return ret; +} + +static bool qcom_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + /* + * Return true here as the SMMU can always send out coherent + * requests. + */ + return true; + case IOMMU_CAP_NOEXEC: + return true; + default: + return false; + } +} + +static int qcom_iommu_add_device(struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct iommu_group *group; + struct device_link *link; + + if (!qcom_iommu) + return -ENODEV; + + /* + * Establish the link between iommu and master, so that the + * iommu gets runtime enabled/disabled as per the master's + * needs. + */ + link = device_link_add(dev, qcom_iommu->dev, DL_FLAG_PM_RUNTIME); + if (!link) { + dev_err(qcom_iommu->dev, "Unable to create device link between %s and %s\n", + dev_name(qcom_iommu->dev), dev_name(dev)); + return -ENODEV; + } + + group = iommu_group_get_for_dev(dev); + if (IS_ERR_OR_NULL(group)) + return PTR_ERR_OR_ZERO(group); + + iommu_group_put(group); + iommu_device_link(&qcom_iommu->iommu, dev); + + return 0; +} + +static void qcom_iommu_remove_device(struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + + if (!qcom_iommu) + return; + + iommu_device_unlink(&qcom_iommu->iommu, dev); + iommu_group_remove_device(dev); + iommu_fwspec_free(dev); +} + +static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct qcom_iommu_dev *qcom_iommu; + struct platform_device *iommu_pdev; + unsigned asid = args->args[0]; + + if (args->args_count != 1) { + dev_err(dev, "incorrect number of iommu params found for %s " + "(found %d, expected 1)\n", + args->np->full_name, args->args_count); + return -EINVAL; + } + + iommu_pdev = of_find_device_by_node(args->np); + if (WARN_ON(!iommu_pdev)) + return -EINVAL; + + qcom_iommu = platform_get_drvdata(iommu_pdev); + + /* make sure the asid specified in dt is valid, so we don't have + * to sanity check this elsewhere, since 'asid - 1' is used to + * index into qcom_iommu->ctxs: + */ + if (WARN_ON(asid < 1) || + WARN_ON(asid > qcom_iommu->num_ctxs)) + return -EINVAL; + + if (!dev->iommu_fwspec->iommu_priv) { + dev->iommu_fwspec->iommu_priv = qcom_iommu; + } else { + /* make sure devices iommus dt node isn't referring to + * multiple different iommu devices. Multiple context + * banks are ok, but multiple devices are not: + */ + if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv)) + return -EINVAL; + } + + return iommu_fwspec_add_ids(dev, &asid, 1); +} + +static const struct iommu_ops qcom_iommu_ops = { + .capable = qcom_iommu_capable, + .domain_alloc = qcom_iommu_domain_alloc, + .domain_free = qcom_iommu_domain_free, + .attach_dev = qcom_iommu_attach_dev, + .detach_dev = qcom_iommu_detach_dev, + .map = qcom_iommu_map, + .unmap = qcom_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = qcom_iommu_iova_to_phys, + .add_device = qcom_iommu_add_device, + .remove_device = qcom_iommu_remove_device, + .device_group = generic_device_group, + .of_xlate = qcom_iommu_of_xlate, + .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, +}; + +static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu) +{ + int ret; + + ret = clk_prepare_enable(qcom_iommu->iface_clk); + if (ret) { + dev_err(qcom_iommu->dev, "Couldn't enable iface_clk\n"); + return ret; + } + + ret = clk_prepare_enable(qcom_iommu->bus_clk); + if (ret) { + dev_err(qcom_iommu->dev, "Couldn't enable bus_clk\n"); + clk_disable_unprepare(qcom_iommu->iface_clk); + return ret; + } + + return 0; +} + +static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) +{ + clk_disable_unprepare(qcom_iommu->bus_clk); + clk_disable_unprepare(qcom_iommu->iface_clk); +} + +static int qcom_iommu_sec_ptbl_init(struct device *dev) +{ + size_t psize = 0; + unsigned int spare = 0; + void *cpu_addr; + dma_addr_t paddr; + unsigned long attrs; + static bool allocated = false; + int ret; + + if (allocated) + return 0; + + ret = qcom_scm_iommu_secure_ptbl_size(spare, &psize); + if (ret) { + dev_err(dev, "failed to get iommu secure pgtable size (%d)\n", + ret); + return ret; + } + + dev_info(dev, "iommu sec: pgtable size: %zu\n", psize); + + attrs = DMA_ATTR_NO_KERNEL_MAPPING; + + cpu_addr = dma_alloc_attrs(dev, psize, &paddr, GFP_KERNEL, attrs); + if (!cpu_addr) { + dev_err(dev, "failed to allocate %zu bytes for pgtable\n", + psize); + return -ENOMEM; + } + + ret = qcom_scm_iommu_secure_ptbl_init(paddr, psize, spare); + if (ret) { + dev_err(dev, "failed to init iommu pgtable (%d)\n", ret); + goto free_mem; + } + + allocated = true; + return 0; + +free_mem: + dma_free_attrs(dev, psize, cpu_addr, paddr, attrs); + return ret; +} + +static int get_asid(const struct device_node *np) +{ + u32 reg; + + /* read the "reg" property directly to get the relative address + * of the context bank, and calculate the asid from that: + */ + if (of_property_read_u32_index(np, "reg", 0, ®)) + return -ENODEV; + + return reg / 0x1000; /* context banks are 0x1000 apart */ +} + +static int qcom_iommu_ctx_probe(struct platform_device *pdev) +{ + struct qcom_iommu_ctx *ctx; + struct device *dev = &pdev->dev; + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev->parent); + struct resource *res; + int ret, irq; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = dev; + platform_set_drvdata(pdev, ctx); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctx->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ctx->base)) + return PTR_ERR(ctx->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return -ENODEV; + } + + /* clear IRQs before registering fault handler, just in case the + * boot-loader left us a surprise: + */ + iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); + + ret = devm_request_irq(dev, irq, + qcom_iommu_fault, + IRQF_SHARED, + "qcom-iommu-fault", + ctx); + if (ret) { + dev_err(dev, "failed to request IRQ %u\n", irq); + return ret; + } + + ret = get_asid(dev->of_node); + if (ret < 0) { + dev_err(dev, "missing reg property\n"); + return ret; + } + + ctx->asid = ret; + + dev_dbg(dev, "found asid %u\n", ctx->asid); + + qcom_iommu->ctxs[ctx->asid - 1] = ctx; + + return 0; +} + +static int qcom_iommu_ctx_remove(struct platform_device *pdev) +{ + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(pdev->dev.parent); + struct qcom_iommu_ctx *ctx = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + qcom_iommu->ctxs[ctx->asid - 1] = NULL; + + return 0; +} + +static const struct of_device_id ctx_of_match[] = { + { .compatible = "qcom,msm-iommu-v1-ns" }, + { .compatible = "qcom,msm-iommu-v1-sec" }, + { /* sentinel */ } +}; + +static struct platform_driver qcom_iommu_ctx_driver = { + .driver = { + .name = "qcom-iommu-ctx", + .of_match_table = of_match_ptr(ctx_of_match), + }, + .probe = qcom_iommu_ctx_probe, + .remove = qcom_iommu_ctx_remove, +}; + +static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) +{ + struct device_node *child; + + for_each_child_of_node(qcom_iommu->dev->of_node, child) + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) + return true; + + return false; +} + +static int qcom_iommu_device_probe(struct platform_device *pdev) +{ + struct device_node *child; + struct qcom_iommu_dev *qcom_iommu; + struct device *dev = &pdev->dev; + struct resource *res; + int ret, sz, max_asid = 0; + + /* find the max asid (which is 1:1 to ctx bank idx), so we know how + * many child ctx devices we have: + */ + for_each_child_of_node(dev->of_node, child) + max_asid = max(max_asid, get_asid(child)); + + sz = sizeof(*qcom_iommu) + (max_asid * sizeof(qcom_iommu->ctxs[0])); + + qcom_iommu = devm_kzalloc(dev, sz, GFP_KERNEL); + if (!qcom_iommu) + return -ENOMEM; + qcom_iommu->num_ctxs = max_asid; + qcom_iommu->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res) + qcom_iommu->local_base = devm_ioremap_resource(dev, res); + + qcom_iommu->iface_clk = devm_clk_get(dev, "iface"); + if (IS_ERR(qcom_iommu->iface_clk)) { + dev_err(dev, "failed to get iface clock\n"); + return PTR_ERR(qcom_iommu->iface_clk); + } + + qcom_iommu->bus_clk = devm_clk_get(dev, "bus"); + if (IS_ERR(qcom_iommu->bus_clk)) { + dev_err(dev, "failed to get bus clock\n"); + return PTR_ERR(qcom_iommu->bus_clk); + } + + if (of_property_read_u32(dev->of_node, "qcom,iommu-secure-id", + &qcom_iommu->sec_id)) { + dev_err(dev, "missing qcom,iommu-secure-id property\n"); + return -ENODEV; + } + + if (qcom_iommu_has_secure_context(qcom_iommu)) { + ret = qcom_iommu_sec_ptbl_init(dev); + if (ret) { + dev_err(dev, "cannot init secure pg table(%d)\n", ret); + return ret; + } + } + + platform_set_drvdata(pdev, qcom_iommu); + + pm_runtime_enable(dev); + + /* register context bank devices, which are child nodes: */ + ret = devm_of_platform_populate(dev); + if (ret) { + dev_err(dev, "Failed to populate iommu contexts\n"); + return ret; + } + + ret = iommu_device_sysfs_add(&qcom_iommu->iommu, dev, NULL, + dev_name(dev)); + if (ret) { + dev_err(dev, "Failed to register iommu in sysfs\n"); + return ret; + } + + iommu_device_set_ops(&qcom_iommu->iommu, &qcom_iommu_ops); + iommu_device_set_fwnode(&qcom_iommu->iommu, dev->fwnode); + + ret = iommu_device_register(&qcom_iommu->iommu); + if (ret) { + dev_err(dev, "Failed to register iommu\n"); + return ret; + } + + bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + + if (qcom_iommu->local_base) { + pm_runtime_get_sync(dev); + writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS); + pm_runtime_put_sync(dev); + } + + return 0; +} + +static int qcom_iommu_device_remove(struct platform_device *pdev) +{ + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + bus_set_iommu(&platform_bus_type, NULL); + + pm_runtime_force_suspend(&pdev->dev); + platform_set_drvdata(pdev, NULL); + iommu_device_sysfs_remove(&qcom_iommu->iommu); + iommu_device_unregister(&qcom_iommu->iommu); + + return 0; +} + +static int __maybe_unused qcom_iommu_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + return qcom_iommu_enable_clocks(qcom_iommu); +} + +static int __maybe_unused qcom_iommu_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + qcom_iommu_disable_clocks(qcom_iommu); + + return 0; +} + +static const struct dev_pm_ops qcom_iommu_pm_ops = { + SET_RUNTIME_PM_OPS(qcom_iommu_suspend, qcom_iommu_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +static const struct of_device_id qcom_iommu_of_match[] = { + { .compatible = "qcom,msm-iommu-v1" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, qcom_iommu_of_match); + +static struct platform_driver qcom_iommu_driver = { + .driver = { + .name = "qcom-iommu", + .of_match_table = of_match_ptr(qcom_iommu_of_match), + .pm = &qcom_iommu_pm_ops, + }, + .probe = qcom_iommu_device_probe, + .remove = qcom_iommu_device_remove, +}; + +static int __init qcom_iommu_init(void) +{ + int ret; + + ret = platform_driver_register(&qcom_iommu_ctx_driver); + if (ret) + return ret; + + ret = platform_driver_register(&qcom_iommu_driver); + if (ret) + platform_driver_unregister(&qcom_iommu_ctx_driver); + + return ret; +} + +static void __exit qcom_iommu_exit(void) +{ + platform_driver_unregister(&qcom_iommu_driver); + platform_driver_unregister(&qcom_iommu_ctx_driver); +} + +module_init(qcom_iommu_init); +module_exit(qcom_iommu_exit); + +IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1", NULL); + +MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4ba48a26b389..9d991c2d8767 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -90,7 +90,9 @@ struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; - int irq; + int *irq; + int num_irq; + bool reset_disabled; struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ @@ -414,6 +416,9 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) int ret, i; u32 dte_addr; + if (iommu->reset_disabled) + return 0; + /* * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY * and verifying that upper 5 nybbles are read back. @@ -825,10 +830,12 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, iommu->domain = domain; - ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq, - IRQF_SHARED, dev_name(dev), iommu); - if (ret) - return ret; + for (i = 0; i < iommu->num_irq; i++) { + ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq, + IRQF_SHARED, dev_name(dev), iommu); + if (ret) + return ret; + } for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, @@ -878,7 +885,8 @@ static void rk_iommu_detach_device(struct iommu_domain *domain, } rk_iommu_disable_stall(iommu); - devm_free_irq(iommu->dev, iommu->irq, iommu); + for (i = 0; i < iommu->num_irq; i++) + devm_free_irq(iommu->dev, iommu->irq[i], iommu); iommu->domain = NULL; @@ -1008,20 +1016,20 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group, ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, &args); if (ret) { - dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", - np->full_name, ret); + dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n", + np, ret); return ret; } if (args.args_count != 0) { - dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", - args.np->full_name, args.args_count); + dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n", + args.np, args.args_count); return -EINVAL; } pd = of_find_device_by_node(args.np); of_node_put(args.np); if (!pd) { - dev_err(dev, "iommu %s not found\n", args.np->full_name); + dev_err(dev, "iommu %pOF not found\n", args.np); return -EPROBE_DEFER; } @@ -1157,12 +1165,28 @@ static int rk_iommu_probe(struct platform_device *pdev) if (iommu->num_mmu == 0) return PTR_ERR(iommu->bases[0]); - iommu->irq = platform_get_irq(pdev, 0); - if (iommu->irq < 0) { - dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq); + iommu->num_irq = platform_irq_count(pdev); + if (iommu->num_irq < 0) + return iommu->num_irq; + if (iommu->num_irq == 0) return -ENXIO; + + iommu->irq = devm_kcalloc(dev, iommu->num_irq, sizeof(*iommu->irq), + GFP_KERNEL); + if (!iommu->irq) + return -ENOMEM; + + for (i = 0; i < iommu->num_irq; i++) { + iommu->irq[i] = platform_get_irq(pdev, i); + if (iommu->irq[i] < 0) { + dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq[i]); + return -ENXIO; + } } + iommu->reset_disabled = device_property_read_bool(dev, + "rockchip,disable-mmu-reset"); + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); if (err) return err; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 179e636a4d91..0e2f31f9032b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -18,6 +18,8 @@ */ #define S390_IOMMU_PGSIZES (~0xFFFUL) +static const struct iommu_ops s390_iommu_ops; + struct s390_domain { struct iommu_domain domain; struct list_head devices; @@ -165,20 +167,16 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, static int s390_iommu_add_device(struct device *dev) { - struct iommu_group *group; - int rc; + struct iommu_group *group = iommu_group_get_for_dev(dev); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; - group = iommu_group_get(dev); - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - } + if (IS_ERR(group)) + return PTR_ERR(group); - rc = iommu_group_add_device(group, dev); iommu_group_put(group); + iommu_device_link(&zdev->iommu_dev, dev); - return rc; + return 0; } static void s390_iommu_remove_device(struct device *dev) @@ -203,6 +201,7 @@ static void s390_iommu_remove_device(struct device *dev) s390_iommu_detach_device(domain, dev); } + iommu_device_unlink(&zdev->iommu_dev, dev); iommu_group_remove_device(dev); } @@ -333,7 +332,37 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain, return size; } -static struct iommu_ops s390_iommu_ops = { +int zpci_init_iommu(struct zpci_dev *zdev) +{ + int rc = 0; + + rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, + "s390-iommu.%08x", zdev->fid); + if (rc) + goto out_err; + + iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops); + + rc = iommu_device_register(&zdev->iommu_dev); + if (rc) + goto out_sysfs; + + return 0; + +out_sysfs: + iommu_device_sysfs_remove(&zdev->iommu_dev); + +out_err: + return rc; +} + +void zpci_destroy_iommu(struct zpci_dev *zdev) +{ + iommu_device_unregister(&zdev->iommu_dev); + iommu_device_sysfs_remove(&zdev->iommu_dev); +} + +static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, .domain_free = s390_domain_free, @@ -344,6 +373,7 @@ static struct iommu_ops s390_iommu_ops = { .iova_to_phys = s390_iommu_iova_to_phys, .add_device = s390_iommu_add_device, .remove_device = s390_iommu_remove_device, + .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 37e708fdbb5a..b62f790ad1ba 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -61,6 +61,8 @@ struct gart_device { struct list_head client; spinlock_t client_lock; /* for client list */ struct device *dev; + + struct iommu_device iommu; /* IOMMU Core handle */ }; struct gart_domain { @@ -334,12 +336,35 @@ static bool gart_iommu_capable(enum iommu_cap cap) return false; } +static int gart_iommu_add_device(struct device *dev) +{ + struct iommu_group *group = iommu_group_get_for_dev(dev); + + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + + iommu_device_link(&gart_handle->iommu, dev); + + return 0; +} + +static void gart_iommu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); + iommu_device_unlink(&gart_handle->iommu, dev); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, .domain_free = gart_iommu_domain_free, .attach_dev = gart_iommu_attach_dev, .detach_dev = gart_iommu_detach_dev, + .add_device = gart_iommu_add_device, + .remove_device = gart_iommu_remove_device, + .device_group = generic_device_group, .map = gart_iommu_map, .map_sg = default_iommu_map_sg, .unmap = gart_iommu_unmap, @@ -378,6 +403,7 @@ static int tegra_gart_probe(struct platform_device *pdev) struct resource *res, *res_remap; void __iomem *gart_regs; struct device *dev = &pdev->dev; + int ret; if (gart_handle) return -EIO; @@ -404,6 +430,22 @@ static int tegra_gart_probe(struct platform_device *pdev) return -ENXIO; } + ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) { + dev_err(dev, "Failed to register IOMMU in sysfs\n"); + return ret; + } + + iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); + + ret = iommu_device_register(&gart->iommu); + if (ret) { + dev_err(dev, "Failed to register IOMMU\n"); + iommu_device_sysfs_remove(&gart->iommu); + return ret; + } + gart->dev = &pdev->dev; spin_lock_init(&gart->pte_lock); spin_lock_init(&gart->client_lock); @@ -430,6 +472,9 @@ static int tegra_gart_remove(struct platform_device *pdev) { struct gart_device *gart = platform_get_drvdata(pdev); + iommu_device_unregister(&gart->iommu); + iommu_device_sysfs_remove(&gart->iommu); + writel(0, gart->regs + GART_CONFIG); if (gart->savedata) vfree(gart->savedata); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index eeb19f560a05..3b6449e2cbf1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -36,6 +36,8 @@ struct tegra_smmu { struct list_head list; struct dentry *debugfs; + + struct iommu_device iommu; /* IOMMU Core code handle */ }; struct tegra_smmu_as { @@ -704,6 +706,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np) static int tegra_smmu_add_device(struct device *dev) { struct device_node *np = dev->of_node; + struct iommu_group *group; struct of_phandle_args args; unsigned int index = 0; @@ -719,18 +722,33 @@ static int tegra_smmu_add_device(struct device *dev) * first match. */ dev->archdata.iommu = smmu; + + iommu_device_link(&smmu->iommu, dev); + break; } index++; } + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; } static void tegra_smmu_remove_device(struct device *dev) { + struct tegra_smmu *smmu = dev->archdata.iommu; + + if (smmu) + iommu_device_unlink(&smmu->iommu, dev); + dev->archdata.iommu = NULL; + iommu_group_remove_device(dev); } static const struct iommu_ops tegra_smmu_ops = { @@ -741,6 +759,7 @@ static const struct iommu_ops tegra_smmu_ops = { .detach_dev = tegra_smmu_detach_dev, .add_device = tegra_smmu_add_device, .remove_device = tegra_smmu_remove_device, + .device_group = generic_device_group, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, .map_sg = default_iommu_map_sg, @@ -930,9 +949,24 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, tegra_smmu_ahb_enable(); + err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev)); + if (err) + return ERR_PTR(err); + + iommu_device_set_ops(&smmu->iommu, &tegra_smmu_ops); + + err = iommu_device_register(&smmu->iommu); + if (err) { + iommu_device_sysfs_remove(&smmu->iommu); + return ERR_PTR(err); + } + err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops); - if (err < 0) + if (err < 0) { + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); return ERR_PTR(err); + } if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_init(smmu); @@ -942,6 +976,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, void tegra_smmu_remove(struct tegra_smmu *smmu) { + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_exit(smmu); } |