summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h22
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h43
-rw-r--r--drivers/iommu/amd/init.c54
-rw-r--r--drivers/iommu/amd/io_pgtable.c558
-rw-r--r--drivers/iommu/amd/iommu.c672
-rw-r--r--drivers/iommu/amd/iommu_v2.c4
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c154
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h14
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c5
-rw-r--r--drivers/iommu/dma-iommu.c29
-rw-r--r--drivers/iommu/hyperv-iommu.c177
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/cap_audit.c205
-rw-r--r--drivers/iommu/intel/cap_audit.h130
-rw-r--r--drivers/iommu/intel/dmar.c11
-rw-r--r--drivers/iommu/intel/iommu.c279
-rw-r--r--drivers/iommu/intel/irq_remapping.c8
-rw-r--r--drivers/iommu/intel/pasid.c18
-rw-r--r--drivers/iommu/intel/svm.c73
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c65
-rw-r--r--drivers/iommu/io-pgtable.c3
-rw-r--r--drivers/iommu/iommu.c54
-rw-r--r--drivers/iommu/iova.c35
-rw-r--r--drivers/iommu/ipmmu-vmsa.c53
-rw-r--r--drivers/iommu/msm_iommu.c10
-rw-r--r--drivers/iommu/mtk_iommu.c410
-rw-r--r--drivers/iommu/mtk_iommu.h12
-rw-r--r--drivers/iommu/tegra-gart.c7
31 files changed, 1997 insertions, 1123 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+ select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index b4adab698563..026ce7f8d993 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -36,6 +36,7 @@ extern void amd_iommu_disable(void);
extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
+extern enum io_pgtable_fmt amd_iommu_pgtable;
/* IOMMUv2 specific functions */
struct iommu_domain;
@@ -56,6 +57,10 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
@@ -99,6 +104,21 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
}
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+ atomic64_set(&domain->iop.pt_root, root);
+ domain->iop.root = (u64 *)(root & PAGE_MASK);
+ domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+ amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
extern bool translation_pre_enabled(struct amd_iommu *iommu);
extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev);
@@ -111,4 +131,6 @@ void amd_iommu_apply_ivrs_quirks(void);
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode);
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 1a0495dd5fcb..6937e3674a16 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/irqreturn.h>
+#include <linux/io-pgtable.h>
/*
* Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
#define GA_GUEST_NR 0x1
+#define IOMMU_IN_ADDR_BIT_SIZE 52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+
/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60)
@@ -470,6 +484,27 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
+#define io_pgtable_to_data(x) \
+ container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+ container_of(io_pgtable_ops_to_data(x), \
+ struct protection_domain, iop)
+
+#define io_pgtable_cfg_to_data(x) \
+ container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+
+struct amd_io_pgtable {
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable iop;
+ int mode;
+ u64 *root;
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -478,9 +513,9 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
iommu core code */
+ struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -488,12 +523,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
-/* For decocded pt_root */
-struct domain_pgtable {
- int mode;
- u64 *root;
-};
-
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 83d8ab2aed9f..9126efcbaf2c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,6 +12,7 @@
#include <linux/acpi.h>
#include <linux/list.h>
#include <linux/bitmap.h>
+#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
@@ -147,6 +148,8 @@ struct ivmd_header {
bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
+enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
+
int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
@@ -254,6 +257,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(void);
+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
static bool amd_iommu_pre_enabled = true;
@@ -1712,13 +1717,11 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0;
}
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value, bool is_write);
-
-static void init_iommu_perf_ctr(struct amd_iommu *iommu)
+static void __init init_iommu_perf_ctr(struct amd_iommu *iommu)
{
+ int retry;
struct pci_dev *pdev = iommu->dev;
- u64 val = 0xabcd, val2 = 0, save_reg = 0;
+ u64 val = 0xabcd, val2 = 0, save_reg, save_src;
if (!iommu_feature(iommu, FEATURE_PC))
return;
@@ -1726,17 +1729,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
amd_iommu_pc_present = true;
/* save the value to restore, if writable */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false))
goto pc_false;
- /* Check if the performance counters can be written to */
- if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
- (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
- (val != val2))
+ /*
+ * Disable power gating by programing the performance counter
+ * source to 20 (i.e. counts the reads and writes from/to IOMMU
+ * Reserved Register [MMIO Offset 1FF8h] that are ignored.),
+ * which never get incremented during this init phase.
+ * (Note: The event is also deprecated.)
+ */
+ val = 20;
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true))
goto pc_false;
+ /* Check if the performance counters can be written to */
+ val = 0xabcd;
+ for (retry = 5; retry; retry--) {
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) ||
+ val2)
+ break;
+
+ /* Wait about 20 msec for power gating to disable and retry. */
+ msleep(20);
+ }
+
/* restore */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true))
+ goto pc_false;
+
+ if (val != val2)
goto pc_false;
pci_info(pdev, "IOMMU performance counters supported\n");
@@ -1928,7 +1953,7 @@ static void print_iommu_info(void)
struct pci_dev *pdev = iommu->dev;
int i;
- pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
+ pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
pci_info(pdev, "Extended features (%#llx):",
@@ -1956,7 +1981,7 @@ static void print_iommu_info(void)
static int __init amd_iommu_init_pci(void)
{
struct amd_iommu *iommu;
- int ret = 0;
+ int ret;
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
@@ -2687,8 +2712,8 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
+ int i, remap_cache_sz, ret;
acpi_status status;
- int i, remap_cache_sz, ret = 0;
u32 pci_id;
if (!amd_iommu_detected)
@@ -2832,7 +2857,6 @@ static int __init early_amd_iommu_init(void)
out:
/* Don't leak any ACPI memory */
acpi_put_table(ivrs_base);
- ivrs_base = NULL;
return ret;
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index 000000000000..1c4961e05c12
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/io-pgtable.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/barrier.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops v1_flush_ops = {
+ .tlb_flush_all = v1_tlb_flush_all,
+ .tlb_flush_walk = v1_tlb_flush_walk,
+ .tlb_add_page = v1_tlb_add_page,
+};
+
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
+/****************************************************************************
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ ****************************************************************************/
+
+static void free_page_list(struct page *freelist)
+{
+ while (freelist != NULL) {
+ unsigned long p = (unsigned long)page_address(freelist);
+
+ freelist = freelist->freelist;
+ free_page(p);
+ }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+ struct page *p = virt_to_page((void *)pt);
+
+ p->freelist = freelist;
+
+ return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) \
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
+{ \
+ unsigned long p; \
+ u64 *pt; \
+ int i; \
+ \
+ pt = (u64 *)__pt; \
+ \
+ for (i = 0; i < 512; ++i) { \
+ /* PTE present? */ \
+ if (!IOMMU_PTE_PRESENT(pt[i])) \
+ continue; \
+ \
+ /* Large PTE? */ \
+ if (PM_PTE_LEVEL(pt[i]) == 0 || \
+ PM_PTE_LEVEL(pt[i]) == 7) \
+ continue; \
+ \
+ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
+ freelist = FN(p, freelist); \
+ } \
+ \
+ return free_pt_page((unsigned long)pt, freelist); \
+}
+
+DEFINE_FREE_PT_FN(l2, free_pt_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
+static struct page *free_sub_pt(unsigned long root, int mode,
+ struct page *freelist)
+{
+ switch (mode) {
+ case PAGE_MODE_NONE:
+ case PAGE_MODE_7_LEVEL:
+ break;
+ case PAGE_MODE_1_LEVEL:
+ freelist = free_pt_page(root, freelist);
+ break;
+ case PAGE_MODE_2_LEVEL:
+ freelist = free_pt_l2(root, freelist);
+ break;
+ case PAGE_MODE_3_LEVEL:
+ freelist = free_pt_l3(root, freelist);
+ break;
+ case PAGE_MODE_4_LEVEL:
+ freelist = free_pt_l4(root, freelist);
+ break;
+ case PAGE_MODE_5_LEVEL:
+ freelist = free_pt_l5(root, freelist);
+ break;
+ case PAGE_MODE_6_LEVEL:
+ freelist = free_pt_l6(root, freelist);
+ break;
+ default:
+ BUG();
+ }
+
+ return freelist;
+}
+
+void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ amd_iommu_domain_set_pt_root(domain, pt_root);
+}
+
+/*
+ * This function is used to add another level to an IO page table. Adding
+ * another level increases the size of the address space by 9 bits to a size up
+ * to 64 bits.
+ */
+static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
+ gfp_t gfp)
+{
+ unsigned long flags;
+ bool ret = true;
+ u64 *pte;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ if (address <= PM_LEVEL_SIZE(domain->iop.mode))
+ goto out;
+
+ ret = false;
+ if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
+ goto out;
+
+ pte = (void *)get_zeroed_page(gfp);
+ if (!pte)
+ goto out;
+
+ *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
+
+ domain->iop.root = pte;
+ domain->iop.mode += 1;
+ amd_iommu_update_and_flush_device_table(domain);
+ amd_iommu_domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
+
+ ret = true;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+
+static u64 *alloc_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long page_size,
+ u64 **pte_page,
+ gfp_t gfp,
+ bool *updated)
+{
+ int level, end_lvl;
+ u64 *pte, *page;
+
+ BUG_ON(!is_power_of_2(page_size));
+
+ while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+ }
+
+
+ level = domain->iop.mode - 1;
+ pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+ address = PAGE_SIZE_ALIGN(address, page_size);
+ end_lvl = PAGE_SIZE_LEVEL(page_size);
+
+ while (level > end_lvl) {
+ u64 __pte, __npte;
+ int pte_level;
+
+ __pte = *pte;
+ pte_level = PM_PTE_LEVEL(__pte);
+
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
+ pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
+ page = (u64 *)get_zeroed_page(gfp);
+
+ if (!page)
+ return NULL;
+
+ __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
+
+ /* pte could have been changed somewhere. */
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
+ free_page((unsigned long)page);
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
+
+ continue;
+ }
+
+ /* No level skipping support yet */
+ if (pte_level != level)
+ return NULL;
+
+ level -= 1;
+
+ pte = IOMMU_PTE_PAGE(__pte);
+
+ if (pte_page && level == end_lvl)
+ *pte_page = pte;
+
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ }
+
+ return pte;
+}
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
+{
+ int level;
+ u64 *pte;
+
+ *page_size = 0;
+
+ if (address > PM_LEVEL_SIZE(pgtable->mode))
+ return NULL;
+
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+
+ while (level > 0) {
+
+ /* Not Present */
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ /* Large PTE */
+ if (PM_PTE_LEVEL(*pte) == 7 ||
+ PM_PTE_LEVEL(*pte) == 0)
+ break;
+
+ /* No level skipping support yet */
+ if (PM_PTE_LEVEL(*pte) != level)
+ return NULL;
+
+ level -= 1;
+
+ /* Walk to the next level */
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+ }
+
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
+
+ return pte;
+}
+
+static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+{
+ unsigned long pt;
+ int mode;
+
+ while (cmpxchg64(pte, pteval, 0) != pteval) {
+ pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
+ pteval = *pte;
+ }
+
+ if (!IOMMU_PTE_PRESENT(pteval))
+ return freelist;
+
+ pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
+ mode = IOMMU_PTE_MODE(pteval);
+
+ return free_sub_pt(pt, mode, freelist);
+}
+
+/*
+ * Generic mapping functions. It maps a physical address into a DMA
+ * address space. It allocates the page table pages if necessary.
+ * In the future it can be extended to a generic mapping function
+ * supporting all features of AMD IOMMU page tables like level skipping
+ * and full 64 bit address spaces.
+ */
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+ struct page *freelist = NULL;
+ bool updated = false;
+ u64 __pte, *pte;
+ int ret, i, count;
+
+ BUG_ON(!IS_ALIGNED(iova, size));
+ BUG_ON(!IS_ALIGNED(paddr, size));
+
+ ret = -EINVAL;
+ if (!(prot & IOMMU_PROT_MASK))
+ goto out;
+
+ count = PAGE_SIZE_PTE_COUNT(size);
+ pte = alloc_pte(dom, iova, size, NULL, gfp, &updated);
+
+ ret = -ENOMEM;
+ if (!pte)
+ goto out;
+
+ for (i = 0; i < count; ++i)
+ freelist = free_clear_pte(&pte[i], pte[i], freelist);
+
+ if (freelist != NULL)
+ updated = true;
+
+ if (count > 1) {
+ __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
+ __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+ } else
+ __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+
+ if (prot & IOMMU_PROT_IR)
+ __pte |= IOMMU_PTE_IR;
+ if (prot & IOMMU_PROT_IW)
+ __pte |= IOMMU_PTE_IW;
+
+ for (i = 0; i < count; ++i)
+ pte[i] = __pte;
+
+ ret = 0;
+
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
+
+ /* Everything flushed out, free pages now */
+ free_page_list(freelist);
+
+ return ret;
+}
+
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long long unmapped;
+ unsigned long unmap_size;
+ u64 *pte;
+
+ BUG_ON(!is_power_of_2(size));
+
+ unmapped = 0;
+
+ while (unmapped < size) {
+ pte = fetch_pte(pgtable, iova, &unmap_size);
+ if (pte) {
+ int i, count;
+
+ count = PAGE_SIZE_PTE_COUNT(unmap_size);
+ for (i = 0; i < count; i++)
+ pte[i] = 0ULL;
+ }
+
+ iova = (iova & ~(unmap_size - 1)) + unmap_size;
+ unmapped += unmap_size;
+ }
+
+ BUG_ON(unmapped && !is_power_of_2(unmapped));
+
+ return unmapped;
+}
+
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long offset_mask, pte_pgsize;
+ u64 *pte, __pte;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return iova;
+
+ pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ offset_mask = pte_pgsize - 1;
+ __pte = __sme_clr(*pte & PM_ADDR_MASK);
+
+ return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
+/*
+ * ----------------------------------------------------
+ */
+static void v1_free_pgtable(struct io_pgtable *iop)
+{
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+ struct protection_domain *dom;
+ struct page *freelist = NULL;
+ unsigned long root;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
+
+ dom = container_of(pgtable, struct protection_domain, iop);
+
+ /* Update data structure */
+ amd_iommu_domain_clr_pt_root(dom);
+
+ /* Make changes visible to IOMMUs */
+ amd_iommu_domain_update(dom);
+
+ /* Page-table is not visible to IOMMU anymore, so free it */
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
+
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
+
+ free_page_list(freelist);
+}
+
+static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES,
+ cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
+ cfg->tlb = &v1_flush_ops;
+
+ pgtable->iop.ops.map = iommu_v1_map_page;
+ pgtable->iop.ops.unmap = iommu_v1_unmap_page;
+ pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+
+ return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
+ .alloc = v1_alloc_pgtable,
+ .free = v1_free_pgtable,
+};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f0adbc48fd17..a69a8b573e40 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -31,6 +31,7 @@
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <linux/iova.h>
+#include <linux/io-pgtable.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -57,16 +58,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * 512GB Pages are not supported due to a hardware bug
- */
-#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-
#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
static DEFINE_SPINLOCK(pd_bitmap_lock);
@@ -96,10 +87,7 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
-static void update_domain(struct protection_domain *domain);
static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -151,37 +139,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
-static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
-{
- u64 pt_root = atomic64_read(&domain->pt_root);
-
- pgtable->root = (u64 *)(pt_root & PAGE_MASK);
- pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-}
-
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
-{
- atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
- amd_iommu_domain_set_pt_root(domain, 0);
-}
-
-static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
- u64 *root, int mode)
-{
- u64 pt_root;
-
- /* lowest 3 bits encode pgtable mode */
- pt_root = mode & 7;
- pt_root |= (u64)root;
-
- amd_iommu_domain_set_pt_root(domain, pt_root);
-}
-
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -437,29 +394,6 @@ static void amd_iommu_uninit_device(struct device *dev)
*/
}
-/*
- * Helper function to get the first pte of a large mapping
- */
-static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
- unsigned long *count)
-{
- unsigned long pte_mask, pg_size, cnt;
- u64 *fpte;
-
- pg_size = PTE_PAGE_SIZE(*pte);
- cnt = PAGE_SIZE_PTE_COUNT(pg_size);
- pte_mask = ~((cnt << 3) - 1);
- fpte = (u64 *)(((unsigned long)pte) & pte_mask);
-
- if (page_size)
- *page_size = pg_size;
-
- if (count)
- *count = cnt;
-
- return fpte;
-}
-
/****************************************************************************
*
* Interrupt handling functions
@@ -1335,12 +1269,12 @@ static void domain_flush_pages(struct protection_domain *domain,
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
{
int i;
@@ -1365,7 +1299,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
}
@@ -1384,443 +1318,6 @@ static void domain_flush_devices(struct protection_domain *domain)
/****************************************************************************
*
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
-
-static void free_page_list(struct page *freelist)
-{
- while (freelist != NULL) {
- unsigned long p = (unsigned long)page_address(freelist);
- freelist = freelist->freelist;
- free_page(p);
- }
-}
-
-static struct page *free_pt_page(unsigned long pt, struct page *freelist)
-{
- struct page *p = virt_to_page((void *)pt);
-
- p->freelist = freelist;
-
- return p;
-}
-
-#define DEFINE_FREE_PT_FN(LVL, FN) \
-static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
-{ \
- unsigned long p; \
- u64 *pt; \
- int i; \
- \
- pt = (u64 *)__pt; \
- \
- for (i = 0; i < 512; ++i) { \
- /* PTE present? */ \
- if (!IOMMU_PTE_PRESENT(pt[i])) \
- continue; \
- \
- /* Large PTE? */ \
- if (PM_PTE_LEVEL(pt[i]) == 0 || \
- PM_PTE_LEVEL(pt[i]) == 7) \
- continue; \
- \
- p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
- freelist = FN(p, freelist); \
- } \
- \
- return free_pt_page((unsigned long)pt, freelist); \
-}
-
-DEFINE_FREE_PT_FN(l2, free_pt_page)
-DEFINE_FREE_PT_FN(l3, free_pt_l2)
-DEFINE_FREE_PT_FN(l4, free_pt_l3)
-DEFINE_FREE_PT_FN(l5, free_pt_l4)
-DEFINE_FREE_PT_FN(l6, free_pt_l5)
-
-static struct page *free_sub_pt(unsigned long root, int mode,
- struct page *freelist)
-{
- switch (mode) {
- case PAGE_MODE_NONE:
- case PAGE_MODE_7_LEVEL:
- break;
- case PAGE_MODE_1_LEVEL:
- freelist = free_pt_page(root, freelist);
- break;
- case PAGE_MODE_2_LEVEL:
- freelist = free_pt_l2(root, freelist);
- break;
- case PAGE_MODE_3_LEVEL:
- freelist = free_pt_l3(root, freelist);
- break;
- case PAGE_MODE_4_LEVEL:
- freelist = free_pt_l4(root, freelist);
- break;
- case PAGE_MODE_5_LEVEL:
- freelist = free_pt_l5(root, freelist);
- break;
- case PAGE_MODE_6_LEVEL:
- freelist = free_pt_l6(root, freelist);
- break;
- default:
- BUG();
- }
-
- return freelist;
-}
-
-static void free_pagetable(struct domain_pgtable *pgtable)
-{
- struct page *freelist = NULL;
- unsigned long root;
-
- if (pgtable->mode == PAGE_MODE_NONE)
- return;
-
- BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > PAGE_MODE_6_LEVEL);
-
- root = (unsigned long)pgtable->root;
- freelist = free_sub_pt(root, pgtable->mode, freelist);
-
- free_page_list(freelist);
-}
-
-/*
- * This function is used to add another level to an IO page table. Adding
- * another level increases the size of the address space by 9 bits to a size up
- * to 64 bits.
- */
-static bool increase_address_space(struct protection_domain *domain,
- unsigned long address,
- gfp_t gfp)
-{
- struct domain_pgtable pgtable;
- unsigned long flags;
- bool ret = true;
- u64 *pte;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address <= PM_LEVEL_SIZE(pgtable.mode))
- goto out;
-
- ret = false;
- if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
- goto out;
-
- pte = (void *)get_zeroed_page(gfp);
- if (!pte)
- goto out;
-
- *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
-
- pgtable.root = pte;
- pgtable.mode += 1;
- update_and_flush_device_table(domain, &pgtable);
- domain_flush_complete(domain);
-
- /*
- * Device Table needs to be updated and flushed before the new root can
- * be published.
- */
- amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
-
- ret = true;
-
-out:
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-static u64 *alloc_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long page_size,
- u64 **pte_page,
- gfp_t gfp,
- bool *updated)
-{
- struct domain_pgtable pgtable;
- int level, end_lvl;
- u64 *pte, *page;
-
- BUG_ON(!is_power_of_2(page_size));
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- while (address > PM_LEVEL_SIZE(pgtable.mode)) {
- /*
- * Return an error if there is no memory to update the
- * page-table.
- */
- if (!increase_address_space(domain, address, gfp))
- return NULL;
-
- /* Read new values to check if update was successful */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- }
-
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- address = PAGE_SIZE_ALIGN(address, page_size);
- end_lvl = PAGE_SIZE_LEVEL(page_size);
-
- while (level > end_lvl) {
- u64 __pte, __npte;
- int pte_level;
-
- __pte = *pte;
- pte_level = PM_PTE_LEVEL(__pte);
-
- /*
- * If we replace a series of large PTEs, we need
- * to tear down all of them.
- */
- if (IOMMU_PTE_PRESENT(__pte) &&
- pte_level == PAGE_MODE_7_LEVEL) {
- unsigned long count, i;
- u64 *lpte;
-
- lpte = first_pte_l7(pte, NULL, &count);
-
- /*
- * Unmap the replicated PTEs that still match the
- * original large mapping
- */
- for (i = 0; i < count; ++i)
- cmpxchg64(&lpte[i], __pte, 0ULL);
-
- *updated = true;
- continue;
- }
-
- if (!IOMMU_PTE_PRESENT(__pte) ||
- pte_level == PAGE_MODE_NONE) {
- page = (u64 *)get_zeroed_page(gfp);
-
- if (!page)
- return NULL;
-
- __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
-
- /* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte)
- free_page((unsigned long)page);
- else if (IOMMU_PTE_PRESENT(__pte))
- *updated = true;
-
- continue;
- }
-
- /* No level skipping support yet */
- if (pte_level != level)
- return NULL;
-
- level -= 1;
-
- pte = IOMMU_PTE_PAGE(__pte);
-
- if (pte_page && level == end_lvl)
- *pte_page = pte;
-
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- }
-
- return pte;
-}
-
-/*
- * This function checks if there is a PTE for a given dma address. If
- * there is one, it returns the pointer to it.
- */
-static u64 *fetch_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long *page_size)
-{
- struct domain_pgtable pgtable;
- int level;
- u64 *pte;
-
- *page_size = 0;
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address > PM_LEVEL_SIZE(pgtable.mode))
- return NULL;
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
-
- while (level > 0) {
-
- /* Not Present */
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- /* Large PTE */
- if (PM_PTE_LEVEL(*pte) == 7 ||
- PM_PTE_LEVEL(*pte) == 0)
- break;
-
- /* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
- return NULL;
-
- level -= 1;
-
- /* Walk to the next level */
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
- }
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
- pte = first_pte_l7(pte, page_size, NULL);
-
- return pte;
-}
-
-static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
-{
- unsigned long pt;
- int mode;
-
- while (cmpxchg64(pte, pteval, 0) != pteval) {
- pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
- pteval = *pte;
- }
-
- if (!IOMMU_PTE_PRESENT(pteval))
- return freelist;
-
- pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
- mode = IOMMU_PTE_MODE(pteval);
-
- return free_sub_pt(pt, mode, freelist);
-}
-
-/*
- * Generic mapping functions. It maps a physical address into a DMA
- * address space. It allocates the page table pages if necessary.
- * In the future it can be extended to a generic mapping function
- * supporting all features of AMD IOMMU page tables like level skipping
- * and full 64 bit address spaces.
- */
-static int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp)
-{
- struct page *freelist = NULL;
- bool updated = false;
- u64 __pte, *pte;
- int ret, i, count;
-
- BUG_ON(!IS_ALIGNED(bus_addr, page_size));
- BUG_ON(!IS_ALIGNED(phys_addr, page_size));
-
- ret = -EINVAL;
- if (!(prot & IOMMU_PROT_MASK))
- goto out;
-
- count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
-
- ret = -ENOMEM;
- if (!pte)
- goto out;
-
- for (i = 0; i < count; ++i)
- freelist = free_clear_pte(&pte[i], pte[i], freelist);
-
- if (freelist != NULL)
- updated = true;
-
- if (count > 1) {
- __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- } else
- __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
-
- if (prot & IOMMU_PROT_IR)
- __pte |= IOMMU_PTE_IR;
- if (prot & IOMMU_PROT_IW)
- __pte |= IOMMU_PTE_IW;
-
- for (i = 0; i < count; ++i)
- pte[i] = __pte;
-
- ret = 0;
-
-out:
- if (updated) {
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- /*
- * Flush domain TLB(s) and wait for completion. Any Device-Table
- * Updates and flushing already happened in
- * increase_address_space().
- */
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
- spin_unlock_irqrestore(&dom->lock, flags);
- }
-
- /* Everything flushed out, free pages now */
- free_page_list(freelist);
-
- return ret;
-}
-
-static unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size)
-{
- unsigned long long unmapped;
- unsigned long unmap_size;
- u64 *pte;
-
- BUG_ON(!is_power_of_2(page_size));
-
- unmapped = 0;
-
- while (unmapped < page_size) {
-
- pte = fetch_pte(dom, bus_addr, &unmap_size);
-
- if (pte) {
- int i, count;
-
- count = PAGE_SIZE_PTE_COUNT(unmap_size);
- for (i = 0; i < count; i++)
- pte[i] = 0ULL;
- }
-
- bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
- unmapped += unmap_size;
- }
-
- BUG_ON(unmapped && !is_power_of_2(unmapped));
-
- return unmapped;
-}
-
-/****************************************************************************
- *
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
@@ -1896,17 +1393,16 @@ static void free_gcr3_table(struct protection_domain *domain)
}
static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (pgtable->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(pgtable->root);
+ if (domain->iop.mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(domain->iop.root);
- pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -1979,7 +1475,6 @@ static void clear_dte_entry(u16 devid)
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -1995,8 +1490,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- set_dte_entry(dev_data->devid, domain, &pgtable,
+ set_dte_entry(dev_data->devid, domain,
ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
@@ -2020,10 +1514,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
/* Flush IOTLB */
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
/* Wait for the flushes to finish */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2156,9 +1650,9 @@ skip_ats_check:
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
out:
spin_unlock(&dev_data->lock);
@@ -2303,36 +1797,31 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
*
*****************************************************************************/
-static void update_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, pgtable,
+ set_dte_entry(dev_data->devid, domain,
dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
{
- update_device_table(domain, pgtable);
+ update_device_table(domain);
domain_flush_devices(domain);
}
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- update_and_flush_device_table(domain, &pgtable);
+ amd_iommu_update_and_flush_device_table(domain);
/* Flush domain TLB(s) and wait for completion */
- domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_complete(domain);
}
int __init amd_iommu_init_api(void)
@@ -2400,22 +1889,19 @@ static void cleanup_domain(struct protection_domain *domain)
static void protection_domain_free(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- amd_iommu_domain_clr_pt_root(domain);
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain, int mode)
+static int protection_domain_init_v1(struct protection_domain *domain, int mode)
{
u64 *pt_root = NULL;
@@ -2438,34 +1924,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode)
return 0;
}
-static struct protection_domain *protection_domain_alloc(int mode)
+static struct protection_domain *protection_domain_alloc(unsigned int type)
{
+ struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
+ int pgtable = amd_iommu_pgtable;
+ int mode = DEFAULT_PGTABLE_LEVEL;
+ int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
- if (protection_domain_init(domain, mode))
+ /*
+ * Force IOMMU v1 page table when iommu=pt and
+ * when allocating domain for pass-through devices.
+ */
+ if (type == IOMMU_DOMAIN_IDENTITY) {
+ pgtable = AMD_IOMMU_V1;
+ mode = PAGE_MODE_NONE;
+ } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ pgtable = AMD_IOMMU_V1;
+ }
+
+ switch (pgtable) {
+ case AMD_IOMMU_V1:
+ ret = protection_domain_init_v1(domain, mode);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
goto out_err;
- return domain;
+ pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
+ if (!pgtbl_ops)
+ goto out_err;
+ return domain;
out_err:
kfree(domain);
-
return NULL;
}
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *domain;
- int mode = DEFAULT_PGTABLE_LEVEL;
-
- if (type == IOMMU_DOMAIN_IDENTITY)
- mode = PAGE_MODE_NONE;
- domain = protection_domain_alloc(mode);
+ domain = protection_domain_alloc(type);
if (!domain)
return NULL;
@@ -2580,12 +2087,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
- int ret;
+ int ret = -EINVAL;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -2593,9 +2100,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
- domain_flush_np_cache(domain, iova, page_size);
+ if (ops->map) {
+ ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
+ domain_flush_np_cache(domain, iova, page_size);
+ }
return ret;
}
@@ -2605,36 +2113,22 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return 0;
- return iommu_unmap_page(domain, iova, page_size);
+ return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
dma_addr_t iova)
{
struct protection_domain *domain = to_pdomain(dom);
- unsigned long offset_mask, pte_pgsize;
- struct domain_pgtable pgtable;
- u64 *pte, __pte;
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
- return iova;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- pte = fetch_pte(domain, iova, &pte_pgsize);
-
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- return 0;
-
- offset_mask = pte_pgsize - 1;
- __pte = __sme_clr(*pte & PM_ADDR_MASK);
-
- return (__pte & ~offset_mask) | (iova & offset_mask);
+ return ops->iova_to_phys(ops, iova);
}
static bool amd_iommu_capable(enum iommu_cap cap)
@@ -2720,8 +2214,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -2799,22 +2293,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- /* First save pgtable configuration*/
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- /* Remove page-table from domain */
- amd_iommu_domain_clr_pt_root(domain);
-
- /* Make changes visible to IOMMUs */
- update_domain(domain);
-
- /* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -2855,7 +2339,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- update_domain(domain);
+ amd_iommu_domain_update(domain);
ret = 0;
@@ -2892,7 +2376,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until IOMMU TLB flushes are complete */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* Now flush device TLBs */
list_for_each_entry(dev_data, &domain->dev_list, list) {
@@ -2918,7 +2402,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until all device TLBs are flushed */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
ret = 0;
@@ -3003,11 +2487,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
static int __set_gcr3(struct protection_domain *domain, u32 pasid,
unsigned long cr3)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -3021,11 +2503,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid,
static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 5ecc0bc608ec..f8d4ad421e07 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -77,7 +77,7 @@ struct fault {
};
static LIST_HEAD(state_list);
-static spinlock_t state_lock;
+static DEFINE_SPINLOCK(state_lock);
static struct workqueue_struct *iommu_wq;
@@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void)
return 0;
}
- spin_lock_init(&state_lock);
-
ret = -ENOMEM;
iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
if (iommu_wq == NULL)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index e13b092e6004..bb251cab61f3 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -182,9 +182,13 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
unsigned long start, unsigned long end)
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
+ struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
+ size_t size = end - start + 1;
- arm_smmu_atc_inv_domain(smmu_mn->domain, mm->pasid, start,
- end - start + 1);
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
+ arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
+ PAGE_SIZE, false, smmu_domain);
+ arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size);
}
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -391,7 +395,7 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
unsigned long reg, fld;
unsigned long oas;
unsigned long asid_bits;
- u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY;
+ u32 feat_mask = ARM_SMMU_FEAT_COHERENCY;
if (vabits_actual == 52)
feat_mask |= ARM_SMMU_FEAT_VAX;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8ca7415d785d..8594b4a83043 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -88,15 +88,6 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
-static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
- struct arm_smmu_device *smmu)
-{
- if (offset > SZ_64K)
- return smmu->page1 + offset - SZ_64K;
-
- return smmu->base + offset;
-}
-
static void parse_driver_options(struct arm_smmu_device *smmu)
{
int i = 0;
@@ -272,9 +263,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
break;
case CMDQ_OP_TLBI_NH_VA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+ fallthrough;
+ case CMDQ_OP_TLBI_EL2_VA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
@@ -296,6 +289,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
+ case CMDQ_OP_TLBI_EL2_ASID:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+ break;
case CMDQ_OP_ATC_INV:
cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
@@ -886,7 +882,8 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_TLBI_NH_ASID,
+ .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
.tlbi.asid = asid,
};
@@ -1269,13 +1266,16 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
}
if (s1_cfg) {
+ u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
+ STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
+
BUG_ON(ste_live);
dst[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
- FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
+ FIELD_PREP(STRTAB_STE_1_STRW, strw));
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
@@ -1667,40 +1667,28 @@ static void arm_smmu_tlb_inv_context(void *cookie)
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
-static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
- size_t granule, bool leaf,
- struct arm_smmu_domain *smmu_domain)
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ size_t granule,
+ struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_device *smmu = smmu_domain->smmu;
- unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
+ unsigned long end = iova + size, num_pages = 0, tg = 0;
size_t inv_range = granule;
struct arm_smmu_cmdq_batch cmds = {};
- struct arm_smmu_cmdq_ent cmd = {
- .tlbi = {
- .leaf = leaf,
- },
- };
if (!size)
return;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- cmd.opcode = CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
- } else {
- cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
- }
-
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
- cmd.tlbi.tg = (tg - 10) / 2;
+ cmd->tlbi.tg = (tg - 10) / 2;
/* Determine what level the granule is at */
- cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
num_pages = size >> tg;
}
@@ -1718,11 +1706,11 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
- cmd.tlbi.scale = scale;
+ cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
- cmd.tlbi.num = num - 1;
+ cmd->tlbi.num = num - 1;
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
@@ -1731,17 +1719,54 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
num_pages -= num << scale;
}
- cmd.tlbi.addr = iova;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ cmd->tlbi.addr = iova;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
iova += inv_range;
}
arm_smmu_cmdq_batch_submit(smmu, &cmds);
+}
+
+static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .tlbi = {
+ .leaf = leaf,
+ },
+ };
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
+ cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ }
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
/*
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
- arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
+ arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
+}
+
+void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
+ .tlbi = {
+ .asid = asid,
+ .leaf = leaf,
+ },
+ };
+
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
}
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
@@ -1757,7 +1782,7 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
- arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
+ arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
}
static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -2280,8 +2305,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
- gather->pgsize, true, smmu_domain);
+ arm_smmu_tlb_inv_range_domain(gather->start,
+ gather->end - gather->start + 1,
+ gather->pgsize, true, smmu_domain);
}
static phys_addr_t
@@ -2611,6 +2637,7 @@ static struct iommu_ops arm_smmu_ops = {
/* Probing and initialisation functions */
static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
struct arm_smmu_queue *q,
+ void __iomem *page,
unsigned long prod_off,
unsigned long cons_off,
size_t dwords, const char *name)
@@ -2639,8 +2666,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1 << q->llq.max_n_shift, name);
}
- q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
- q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
+ q->prod_reg = page + prod_off;
+ q->cons_reg = page + cons_off;
q->ent_dwords = dwords;
q->q_base = Q_BASE_RWA;
@@ -2684,9 +2711,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
int ret;
/* cmdq */
- ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
- ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
- "cmdq");
+ ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
+ ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
+ CMDQ_ENT_DWORDS, "cmdq");
if (ret)
return ret;
@@ -2695,9 +2722,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
return ret;
/* evtq */
- ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
- ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
- "evtq");
+ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
+ ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
+ EVTQ_ENT_DWORDS, "evtq");
if (ret)
return ret;
@@ -2705,9 +2732,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
- return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
- ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
- "priq");
+ return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
+ ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
+ PRIQ_ENT_DWORDS, "priq");
}
static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -3060,7 +3087,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
/* CR2 (random crap) */
- reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
+ reg = CR2_PTM | CR2_RECINVSID;
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H)
+ reg |= CR2_E2H;
+
writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
/* Stream table */
@@ -3099,10 +3130,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
- writel_relaxed(smmu->evtq.q.llq.prod,
- arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
- writel_relaxed(smmu->evtq.q.llq.cons,
- arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
+ writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
+ writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
enables |= CR0_EVTQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -3117,9 +3146,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writeq_relaxed(smmu->priq.q.q_base,
smmu->base + ARM_SMMU_PRIQ_BASE);
writel_relaxed(smmu->priq.q.llq.prod,
- arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
+ smmu->page1 + ARM_SMMU_PRIQ_PROD);
writel_relaxed(smmu->priq.q.llq.cons,
- arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
+ smmu->page1 + ARM_SMMU_PRIQ_CONS);
enables |= CR0_PRIQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -3221,8 +3250,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->options |= ARM_SMMU_OPT_MSIPOLL;
}
- if (reg & IDR0_HYP)
+ if (reg & IDR0_HYP) {
smmu->features |= ARM_SMMU_FEAT_HYP;
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ smmu->features |= ARM_SMMU_FEAT_E2H;
+ }
/*
* The coherency feature as set by FW is used in preference to the ID
@@ -3489,11 +3521,7 @@ err_reset_pci_ops: __maybe_unused;
static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
resource_size_t size)
{
- struct resource res = {
- .flags = IORESOURCE_MEM,
- .start = start,
- .end = start + size - 1,
- };
+ struct resource res = DEFINE_RES_MEM(start, size);
return devm_ioremap_resource(dev, &res);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 96c2e9565e00..f985817c967a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -139,15 +139,15 @@
#define ARM_SMMU_CMDQ_CONS 0x9c
#define ARM_SMMU_EVTQ_BASE 0xa0
-#define ARM_SMMU_EVTQ_PROD 0x100a8
-#define ARM_SMMU_EVTQ_CONS 0x100ac
+#define ARM_SMMU_EVTQ_PROD 0xa8
+#define ARM_SMMU_EVTQ_CONS 0xac
#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
#define ARM_SMMU_PRIQ_BASE 0xc0
-#define ARM_SMMU_PRIQ_PROD 0x100c8
-#define ARM_SMMU_PRIQ_CONS 0x100cc
+#define ARM_SMMU_PRIQ_PROD 0xc8
+#define ARM_SMMU_PRIQ_CONS 0xcc
#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
@@ -430,6 +430,8 @@ struct arm_smmu_cmdq_ent {
#define CMDQ_OP_TLBI_NH_ASID 0x11
#define CMDQ_OP_TLBI_NH_VA 0x12
#define CMDQ_OP_TLBI_EL2_ALL 0x20
+ #define CMDQ_OP_TLBI_EL2_ASID 0x21
+ #define CMDQ_OP_TLBI_EL2_VA 0x22
#define CMDQ_OP_TLBI_S12_VMALL 0x28
#define CMDQ_OP_TLBI_S2_IPA 0x2a
#define CMDQ_OP_TLBI_NSNH_ALL 0x30
@@ -604,6 +606,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_RANGE_INV (1 << 15)
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
+#define ARM_SMMU_FEAT_E2H (1 << 18)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -694,6 +697,9 @@ extern struct arm_smmu_ctx_desc quiet_cd;
int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
+void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain);
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
unsigned long iova, size_t size);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index bcda17012aee..98b3a1c2a181 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -166,6 +166,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,sc7180-mdss" },
{ .compatible = "qcom,sc7180-mss-pil" },
+ { .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ }
@@ -206,6 +207,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) {
+ /* Ignore valid bit for SMR mask extraction. */
+ smr &= ~ARM_SMMU_SMR_VALID;
smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr);
smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
smmu->smrs[i].valid = true;
@@ -327,10 +330,12 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,msm8998-smmu-v2" },
{ .compatible = "qcom,sc7180-smmu-500" },
+ { .compatible = "qcom,sc8180x-smmu-500" },
{ .compatible = "qcom,sdm630-smmu-v2" },
{ .compatible = "qcom,sdm845-smmu-500" },
{ .compatible = "qcom,sm8150-smmu-500" },
{ .compatible = "qcom,sm8250-smmu-500" },
+ { .compatible = "qcom,sm8350-smmu-500" },
{ }
};
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4078358ed66e..f659395e7959 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -51,6 +51,8 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
};
+static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
+
void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
struct iommu_domain *domain)
{
@@ -378,21 +380,6 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
return iova_reserve_iommu_regions(dev, domain);
}
-static int iommu_dma_deferred_attach(struct device *dev,
- struct iommu_domain *domain)
-{
- const struct iommu_ops *ops = domain->ops;
-
- if (!is_kdump_kernel())
- return 0;
-
- if (unlikely(ops->is_attach_deferred &&
- ops->is_attach_deferred(domain, dev)))
- return iommu_attach_device(domain, dev);
-
- return 0;
-}
-
/**
* dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
* page flags.
@@ -535,7 +522,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t iova_off = iova_offset(iovad, phys);
dma_addr_t iova;
- if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, domain))
return DMA_MAPPING_ERROR;
size = iova_align(iovad, size + iova_off);
@@ -693,7 +681,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
*dma_handle = DMA_MAPPING_ERROR;
- if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, domain))
return NULL;
min_size = alloc_sizes & -alloc_sizes;
@@ -976,7 +965,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
unsigned long mask = dma_get_seg_boundary(dev);
int i;
- if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, domain))
return 0;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1424,6 +1414,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc,
static int iommu_dma_init(void)
{
+ if (is_kdump_kernel())
+ static_branch_enable(&iommu_deferred_attach_enabled);
+
return iova_cache_get();
}
arch_initcall(iommu_dma_init);
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 1d21a0b5f724..e285a220c913 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -20,6 +20,7 @@
#include <asm/io_apic.h>
#include <asm/irq_remapping.h>
#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
#include "irq_remapping.h"
@@ -115,30 +116,43 @@ static const struct irq_domain_ops hyperv_ir_domain_ops = {
.free = hyperv_irq_remapping_free,
};
+static const struct irq_domain_ops hyperv_root_ir_domain_ops;
static int __init hyperv_prepare_irq_remapping(void)
{
struct fwnode_handle *fn;
int i;
+ const char *name;
+ const struct irq_domain_ops *ops;
if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
x86_init.hyper.msi_ext_dest_id() ||
!x2apic_supported())
return -ENODEV;
- fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0);
+ if (hv_root_partition) {
+ name = "HYPERV-ROOT-IR";
+ ops = &hyperv_root_ir_domain_ops;
+ } else {
+ name = "HYPERV-IR";
+ ops = &hyperv_ir_domain_ops;
+ }
+
+ fn = irq_domain_alloc_named_id_fwnode(name, 0);
if (!fn)
return -ENOMEM;
ioapic_ir_domain =
irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
- 0, IOAPIC_REMAPPING_ENTRY, fn,
- &hyperv_ir_domain_ops, NULL);
+ 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL);
if (!ioapic_ir_domain) {
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
+ if (hv_root_partition)
+ return 0; /* The rest is only relevant to guests */
+
/*
* Hyper-V doesn't provide irq remapping function for
* IO-APIC and so IO-APIC only accepts 8-bit APIC ID.
@@ -166,4 +180,161 @@ struct irq_remap_ops hyperv_irq_remap_ops = {
.enable = hyperv_enable_irq_remapping,
};
+/* IRQ remapping domain when Linux runs as the root partition */
+struct hyperv_root_ir_data {
+ u8 ioapic_id;
+ bool is_level;
+ struct hv_interrupt_entry entry;
+};
+
+static void
+hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
+{
+ u64 status;
+ u32 vector;
+ struct irq_cfg *cfg;
+ int ioapic_id;
+ struct cpumask *affinity;
+ int cpu;
+ struct hv_interrupt_entry entry;
+ struct hyperv_root_ir_data *data = irq_data->chip_data;
+ struct IO_APIC_route_entry e;
+
+ cfg = irqd_cfg(irq_data);
+ affinity = irq_data_get_effective_affinity_mask(irq_data);
+ cpu = cpumask_first_and(affinity, cpu_online_mask);
+
+ vector = cfg->vector;
+ ioapic_id = data->ioapic_id;
+
+ if (data->entry.source == HV_DEVICE_TYPE_IOAPIC
+ && data->entry.ioapic_rte.as_uint64) {
+ entry = data->entry;
+
+ status = hv_unmap_ioapic_interrupt(ioapic_id, &entry);
+
+ if (status != HV_STATUS_SUCCESS)
+ pr_debug("%s: unexpected unmap status %lld\n", __func__, status);
+
+ data->entry.ioapic_rte.as_uint64 = 0;
+ data->entry.source = 0; /* Invalid source */
+ }
+
+
+ status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu,
+ vector, &entry);
+
+ if (status != HV_STATUS_SUCCESS) {
+ pr_err("%s: map hypercall failed, status %lld\n", __func__, status);
+ return;
+ }
+
+ data->entry = entry;
+
+ /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */
+ e.w1 = entry.ioapic_rte.low_uint32;
+ e.w2 = entry.ioapic_rte.high_uint32;
+
+ memset(msg, 0, sizeof(*msg));
+ msg->arch_data.vector = e.vector;
+ msg->arch_data.delivery_mode = e.delivery_mode;
+ msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical;
+ msg->arch_addr_lo.dmar_format = e.ir_format;
+ msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14;
+}
+
+static int hyperv_root_ir_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_data *parent = data->parent_data;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ int ret;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+static struct irq_chip hyperv_root_ir_chip = {
+ .name = "HYPERV-ROOT-IR",
+ .irq_ack = apic_ack_irq,
+ .irq_set_affinity = hyperv_root_ir_set_affinity,
+ .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg,
+};
+
+static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data;
+ struct hyperv_root_ir_data *data;
+ int ret = 0;
+
+ if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1)
+ return -EINVAL;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+ return -ENOMEM;
+ }
+
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (!irq_data) {
+ kfree(data);
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+ return -EINVAL;
+ }
+
+ data->ioapic_id = info->devid;
+ data->is_level = info->ioapic.is_level;
+
+ irq_data->chip = &hyperv_root_ir_chip;
+ irq_data->chip_data = data;
+
+ return 0;
+}
+
+static void hyperv_root_irq_remapping_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *irq_data;
+ struct hyperv_root_ir_data *data;
+ struct hv_interrupt_entry *e;
+ int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ e = &data->entry;
+
+ if (e->source == HV_DEVICE_TYPE_IOAPIC
+ && e->ioapic_rte.as_uint64)
+ hv_unmap_ioapic_interrupt(data->ioapic_id,
+ &data->entry);
+
+ kfree(data);
+ }
+ }
+
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops hyperv_root_ir_domain_ops = {
+ .select = hyperv_irq_remapping_select,
+ .alloc = hyperv_root_irq_remapping_alloc,
+ .free = hyperv_root_irq_remapping_free,
+};
+
#endif
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index fb8e1e8c8029..ae236ec7d219 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
-obj-$(CONFIG_INTEL_IOMMU) += trace.o
+obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
new file mode 100644
index 000000000000..b12e421a2f1a
--- /dev/null
+++ b/drivers/iommu/intel/cap_audit.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cap_audit.c - audit iommu capabilities for boot time and hot plug
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Kyung Min Park <kyung.min.park@intel.com>
+ * Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#define pr_fmt(fmt) "DMAR: " fmt
+
+#include <linux/intel-iommu.h>
+#include "cap_audit.h"
+
+static u64 intel_iommu_cap_sanity;
+static u64 intel_iommu_ecap_sanity;
+
+static inline void check_irq_capabilities(struct intel_iommu *a,
+ struct intel_iommu *b)
+{
+ CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK);
+}
+
+static inline void check_dmar_capabilities(struct intel_iommu *a,
+ struct intel_iommu *b)
+{
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK);
+ MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK);
+ MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK);
+ MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK);
+ MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK);
+
+ CHECK_FEATURE_MISMATCH(a, b, cap, 5lp_support, CAP_FL5LP_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK);
+ CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK);
+}
+
+static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type)
+{
+ bool mismatch = false;
+ u64 old_cap = intel_iommu_cap_sanity;
+ u64 old_ecap = intel_iommu_ecap_sanity;
+
+ if (type == CAP_AUDIT_HOTPLUG_IRQR) {
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK);
+ goto out;
+ }
+
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, 5lp_support, CAP_FL5LP_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK);
+ CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK);
+
+ /* Abort hot plug if the hot plug iommu feature is smaller than global */
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch);
+ MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch);
+
+out:
+ if (mismatch) {
+ intel_iommu_cap_sanity = old_cap;
+ intel_iommu_ecap_sanity = old_ecap;
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
+{
+ struct dmar_drhd_unit *d;
+ struct intel_iommu *i;
+
+ rcu_read_lock();
+ if (list_empty(&dmar_drhd_units))
+ goto out;
+
+ for_each_active_iommu(i, d) {
+ if (!iommu) {
+ intel_iommu_ecap_sanity = i->ecap;
+ intel_iommu_cap_sanity = i->cap;
+ iommu = i;
+ continue;
+ }
+
+ if (type == CAP_AUDIT_STATIC_DMAR)
+ check_dmar_capabilities(iommu, i);
+ else
+ check_irq_capabilities(iommu, i);
+ }
+
+out:
+ rcu_read_unlock();
+ return 0;
+}
+
+int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
+{
+ switch (type) {
+ case CAP_AUDIT_STATIC_DMAR:
+ case CAP_AUDIT_STATIC_IRQR:
+ return cap_audit_static(iommu, type);
+ case CAP_AUDIT_HOTPLUG_DMAR:
+ case CAP_AUDIT_HOTPLUG_IRQR:
+ return cap_audit_hotplug(iommu, type);
+ default:
+ break;
+ }
+
+ return -EFAULT;
+}
+
+bool intel_cap_smts_sanity(void)
+{
+ return ecap_smts(intel_iommu_ecap_sanity);
+}
+
+bool intel_cap_pasid_sanity(void)
+{
+ return ecap_pasid(intel_iommu_ecap_sanity);
+}
+
+bool intel_cap_nest_sanity(void)
+{
+ return ecap_nest(intel_iommu_ecap_sanity);
+}
+
+bool intel_cap_flts_sanity(void)
+{
+ return ecap_flts(intel_iommu_ecap_sanity);
+}
diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h
new file mode 100644
index 000000000000..74cfccae0e81
--- /dev/null
+++ b/drivers/iommu/intel/cap_audit.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cap_audit.h - audit iommu capabilities header
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Kyung Min Park <kyung.min.park@intel.com>
+ */
+
+/*
+ * Capability Register Mask
+ */
+#define CAP_FL5LP_MASK BIT_ULL(60)
+#define CAP_PI_MASK BIT_ULL(59)
+#define CAP_FL1GP_MASK BIT_ULL(56)
+#define CAP_RD_MASK BIT_ULL(55)
+#define CAP_WD_MASK BIT_ULL(54)
+#define CAP_MAMV_MASK GENMASK_ULL(53, 48)
+#define CAP_NFR_MASK GENMASK_ULL(47, 40)
+#define CAP_PSI_MASK BIT_ULL(39)
+#define CAP_SLLPS_MASK GENMASK_ULL(37, 34)
+#define CAP_FRO_MASK GENMASK_ULL(33, 24)
+#define CAP_ZLR_MASK BIT_ULL(22)
+#define CAP_MGAW_MASK GENMASK_ULL(21, 16)
+#define CAP_SAGAW_MASK GENMASK_ULL(12, 8)
+#define CAP_CM_MASK BIT_ULL(7)
+#define CAP_PHMR_MASK BIT_ULL(6)
+#define CAP_PLMR_MASK BIT_ULL(5)
+#define CAP_RWBF_MASK BIT_ULL(4)
+#define CAP_AFL_MASK BIT_ULL(3)
+#define CAP_NDOMS_MASK GENMASK_ULL(2, 0)
+
+/*
+ * Extended Capability Register Mask
+ */
+#define ECAP_RPS_MASK BIT_ULL(49)
+#define ECAP_SMPWC_MASK BIT_ULL(48)
+#define ECAP_FLTS_MASK BIT_ULL(47)
+#define ECAP_SLTS_MASK BIT_ULL(46)
+#define ECAP_SLADS_MASK BIT_ULL(45)
+#define ECAP_VCS_MASK BIT_ULL(44)
+#define ECAP_SMTS_MASK BIT_ULL(43)
+#define ECAP_PDS_MASK BIT_ULL(42)
+#define ECAP_DIT_MASK BIT_ULL(41)
+#define ECAP_PASID_MASK BIT_ULL(40)
+#define ECAP_PSS_MASK GENMASK_ULL(39, 35)
+#define ECAP_EAFS_MASK BIT_ULL(34)
+#define ECAP_NWFS_MASK BIT_ULL(33)
+#define ECAP_SRS_MASK BIT_ULL(31)
+#define ECAP_ERS_MASK BIT_ULL(30)
+#define ECAP_PRS_MASK BIT_ULL(29)
+#define ECAP_NEST_MASK BIT_ULL(26)
+#define ECAP_MTS_MASK BIT_ULL(25)
+#define ECAP_MHMV_MASK GENMASK_ULL(23, 20)
+#define ECAP_IRO_MASK GENMASK_ULL(17, 8)
+#define ECAP_SC_MASK BIT_ULL(7)
+#define ECAP_PT_MASK BIT_ULL(6)
+#define ECAP_EIM_MASK BIT_ULL(4)
+#define ECAP_DT_MASK BIT_ULL(2)
+#define ECAP_QI_MASK BIT_ULL(1)
+#define ECAP_C_MASK BIT_ULL(0)
+
+/*
+ * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each
+ * IOMMU gets audited.
+ */
+#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
+do { \
+ if (cap##_##feature(a) != cap##_##feature(b)) { \
+ intel_iommu_##cap##_sanity &= ~(MASK); \
+ pr_info("IOMMU feature %s inconsistent", #feature); \
+ } \
+} while (0)
+
+#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
+ DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK)
+
+#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \
+do { \
+ if (cap##_##feature(intel_iommu_##cap##_sanity)) \
+ DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \
+ (b)->cap, cap, feature, MASK); \
+} while (0)
+
+#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \
+do { \
+ u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \
+ min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \
+ intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \
+ min_feature; \
+} while (0)
+
+#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \
+do { \
+ if ((intel_iommu_##cap##_sanity & (MASK)) > \
+ (cap##_##feature((iommu)->cap))) \
+ mismatch = true; \
+ else \
+ (iommu)->cap = ((iommu)->cap & ~(MASK)) | \
+ (intel_iommu_##cap##_sanity & (MASK)); \
+} while (0)
+
+enum cap_audit_type {
+ CAP_AUDIT_STATIC_DMAR,
+ CAP_AUDIT_STATIC_IRQR,
+ CAP_AUDIT_HOTPLUG_DMAR,
+ CAP_AUDIT_HOTPLUG_IRQR,
+};
+
+bool intel_cap_smts_sanity(void);
+bool intel_cap_pasid_sanity(void);
+bool intel_cap_nest_sanity(void);
+bool intel_cap_flts_sanity(void);
+
+static inline bool scalable_mode_support(void)
+{
+ return (intel_iommu_sm && intel_cap_smts_sanity());
+}
+
+static inline bool pasid_mode_support(void)
+{
+ return scalable_mode_support() && intel_cap_pasid_sanity();
+}
+
+static inline bool nested_mode_support(void)
+{
+ return scalable_mode_support() && intel_cap_nest_sanity();
+}
+
+int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 02e7c10a4224..d5c51b5c20af 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -31,6 +31,7 @@
#include <linux/limits.h>
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
+#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
@@ -525,6 +526,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
struct acpi_dmar_reserved_memory *rmrr;
struct acpi_dmar_atsr *atsr;
struct acpi_dmar_rhsa *rhsa;
+ struct acpi_dmar_satc *satc;
switch (header->type) {
case ACPI_DMAR_TYPE_HARDWARE_UNIT:
@@ -554,6 +556,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
/* We don't print this here because we need to sanity-check
it first. So print it in dmar_parse_one_andd() instead. */
break;
+ case ACPI_DMAR_TYPE_SATC:
+ satc = container_of(header, struct acpi_dmar_satc, header);
+ pr_info("SATC flags: 0x%x\n", satc->flags);
+ break;
}
}
@@ -641,6 +647,7 @@ parse_dmar_table(void)
.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
+ .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
};
/*
@@ -1307,6 +1314,8 @@ restart:
offset = ((index + i) % QI_LENGTH) << shift;
memcpy(qi->desc + offset, &desc[i], 1 << shift);
qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+ trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
+ desc[i].qw2, desc[i].qw3);
}
qi->desc_status[wait_index] = QI_IN_USE;
@@ -2074,6 +2083,7 @@ static guid_t dmar_hp_guid =
#define DMAR_DSM_FUNC_DRHD 1
#define DMAR_DSM_FUNC_ATSR 2
#define DMAR_DSM_FUNC_RHSA 3
+#define DMAR_DSM_FUNC_SATC 4
static inline bool dmar_detect_dsm(acpi_handle handle, int func)
{
@@ -2091,6 +2101,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
+ [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
};
if (!dmar_detect_dsm(handle, func))
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 06b00b5363d8..ee0932307d64 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -44,10 +44,10 @@
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
-#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
#include "pasid.h"
+#include "cap_audit.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -316,8 +316,18 @@ struct dmar_atsr_unit {
u8 include_all:1; /* include all ports */
};
+struct dmar_satc_unit {
+ struct list_head list; /* list of SATC units */
+ struct acpi_dmar_header *hdr; /* ACPI header */
+ struct dmar_dev_scope *devices; /* target devices */
+ struct intel_iommu *iommu; /* the corresponding iommu */
+ int devices_cnt; /* target device count */
+ u8 atc_required:1; /* ATS is required */
+};
+
static LIST_HEAD(dmar_atsr_units);
static LIST_HEAD(dmar_rmrr_units);
+static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
@@ -1017,8 +1027,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
- if (domain_use_first_level(domain))
+ if (domain_use_first_level(domain)) {
pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ pteval |= DMA_FL_PTE_ACCESS;
+ }
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1861,25 +1874,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
*/
static bool first_level_by_default(void)
{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- static int first_level_support = -1;
-
- if (likely(first_level_support != -1))
- return first_level_support;
-
- first_level_support = 1;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
- first_level_support = 0;
- break;
- }
- }
- rcu_read_unlock();
-
- return first_level_support;
+ return scalable_mode_support() && intel_cap_flts_sanity();
}
static struct dmar_domain *alloc_domain(int flags)
@@ -2298,9 +2293,9 @@ static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages, int prot)
{
- struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ struct dma_pte *pte = NULL;
phys_addr_t pteval;
u64 attr;
@@ -2310,9 +2305,16 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -EINVAL;
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
- if (domain_use_first_level(domain))
+ if (domain_use_first_level(domain)) {
attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
+ if (domain->domain.type == IOMMU_DOMAIN_DMA) {
+ attr |= DMA_FL_PTE_ACCESS;
+ if (prot & DMA_PTE_WRITE)
+ attr |= DMA_FL_PTE_DIRTY;
+ }
+ }
+
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
while (nr_pages > 0) {
@@ -2322,7 +2324,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
phys_pfn, nr_pages);
- first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+ pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
if (!pte)
return -ENOMEM;
/* It is large page*/
@@ -2383,34 +2385,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
* recalculate 'pte' and switch back to smaller pages for the
* end of the mapping, if the trailing size is not enough to
* use another superpage (i.e. nr_pages < lvl_pages).
+ *
+ * We leave clflush for the leaf pte changes to iotlb_sync_map()
+ * callback.
*/
pte++;
if (!nr_pages || first_pte_in_page(pte) ||
- (largepage_lvl > 1 && nr_pages < lvl_pages)) {
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
+ (largepage_lvl > 1 && nr_pages < lvl_pages))
pte = NULL;
- }
- }
-
- return 0;
-}
-
-static int
-domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot)
-{
- int iommu_id, ret;
- struct intel_iommu *iommu;
-
- /* Do the real mapping first */
- ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot);
- if (ret)
- return ret;
-
- for_each_domain_iommu(iommu_id, domain) {
- iommu = g_iommus[iommu_id];
- __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
}
return 0;
@@ -3197,6 +3179,10 @@ static int __init init_dmars(void)
goto error;
}
+ ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
+ if (ret)
+ goto free_iommu;
+
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
iommu_disable_translation(iommu);
@@ -3740,6 +3726,57 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
return 0;
}
+static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
+{
+ struct dmar_satc_unit *satcu;
+ struct acpi_dmar_satc *tmp;
+
+ list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
+ dmar_rcu_check()) {
+ tmp = (struct acpi_dmar_satc *)satcu->hdr;
+ if (satc->segment != tmp->segment)
+ continue;
+ if (satc->header.length != tmp->header.length)
+ continue;
+ if (memcmp(satc, tmp, satc->header.length) == 0)
+ return satcu;
+ }
+
+ return NULL;
+}
+
+int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
+{
+ struct acpi_dmar_satc *satc;
+ struct dmar_satc_unit *satcu;
+
+ if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
+ return 0;
+
+ satc = container_of(hdr, struct acpi_dmar_satc, header);
+ satcu = dmar_find_satc(satc);
+ if (satcu)
+ return 0;
+
+ satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
+ if (!satcu)
+ return -ENOMEM;
+
+ satcu->hdr = (void *)(satcu + 1);
+ memcpy(satcu->hdr, hdr, hdr->length);
+ satcu->atc_required = satc->flags & 0x1;
+ satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
+ (void *)satc + satc->header.length,
+ &satcu->devices_cnt);
+ if (satcu->devices_cnt && !satcu->devices) {
+ kfree(satcu);
+ return -ENOMEM;
+ }
+ list_add_rcu(&satcu->list, &dmar_satc_units);
+
+ return 0;
+}
+
static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
{
int sp, ret;
@@ -3748,6 +3785,10 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (g_iommus[iommu->seq_id])
return 0;
+ ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
+ if (ret)
+ goto out;
+
if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
pr_warn("%s: Doesn't support hardware pass through.\n",
iommu->name);
@@ -3843,6 +3884,7 @@ static void intel_iommu_free_dmars(void)
{
struct dmar_rmrr_unit *rmrru, *rmrr_n;
struct dmar_atsr_unit *atsru, *atsr_n;
+ struct dmar_satc_unit *satcu, *satc_n;
list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
list_del(&rmrru->list);
@@ -3854,6 +3896,11 @@ static void intel_iommu_free_dmars(void)
list_del(&atsru->list);
intel_iommu_free_atsr(atsru);
}
+ list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
+ list_del(&satcu->list);
+ dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
+ kfree(satcu);
+ }
}
int dmar_find_matched_atsr_unit(struct pci_dev *dev)
@@ -3905,8 +3952,10 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
int ret;
struct dmar_rmrr_unit *rmrru;
struct dmar_atsr_unit *atsru;
+ struct dmar_satc_unit *satcu;
struct acpi_dmar_atsr *atsr;
struct acpi_dmar_reserved_memory *rmrr;
+ struct acpi_dmar_satc *satc;
if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
return 0;
@@ -3947,6 +3996,23 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
break;
}
}
+ list_for_each_entry(satcu, &dmar_satc_units, list) {
+ satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
+ if (info->event == BUS_NOTIFY_ADD_DEVICE) {
+ ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
+ (void *)satc + satc->header.length,
+ satc->segment, satcu->devices,
+ satcu->devices_cnt);
+ if (ret > 0)
+ break;
+ else if (ret < 0)
+ return ret;
+ } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
+ if (dmar_remove_dev_scope(info, satc->segment,
+ satcu->devices, satcu->devices_cnt))
+ break;
+ }
+ }
return 0;
}
@@ -4290,6 +4356,9 @@ int __init intel_iommu_init(void)
if (list_empty(&dmar_atsr_units))
pr_info("No ATSR found\n");
+ if (list_empty(&dmar_satc_units))
+ pr_info("No SATC found\n");
+
if (dmar_map_gfx)
intel_iommu_gfx_mapped = 1;
@@ -4943,7 +5012,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
int prot = 0;
- int ret;
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
@@ -4969,9 +5037,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
/* Round up size to next multiple of PAGE_SIZE, if it and
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
- ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot);
- return ret;
+ return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
+ hpa >> VTD_PAGE_SHIFT, size, prot);
}
static size_t intel_iommu_unmap(struct iommu_domain *domain,
@@ -5040,60 +5107,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys;
}
-static inline bool scalable_mode_support(void)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool ret = true;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!sm_supported(iommu)) {
- ret = false;
- break;
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline bool iommu_pasid_support(void)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool ret = true;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!pasid_supported(iommu)) {
- ret = false;
- break;
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline bool nested_mode_support(void)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool ret = true;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
- ret = false;
- break;
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5334,7 +5347,7 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
int ret;
if (!dev_is_pci(dev) || dmar_disabled ||
- !scalable_mode_support() || !iommu_pasid_support())
+ !scalable_mode_support() || !pasid_mode_support())
return false;
ret = pci_pasid_features(to_pci_dev(dev));
@@ -5508,6 +5521,57 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
+static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
+ unsigned long clf_pages)
+{
+ struct dma_pte *first_pte = NULL, *pte = NULL;
+ unsigned long lvl_pages = 0;
+ int level = 0;
+
+ while (clf_pages > 0) {
+ if (!pte) {
+ level = 0;
+ pte = pfn_to_dma_pte(domain, clf_pfn, &level);
+ if (WARN_ON(!pte))
+ return;
+ first_pte = pte;
+ lvl_pages = lvl_to_nr_pages(level);
+ }
+
+ if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
+ return;
+
+ clf_pages -= lvl_pages;
+ clf_pfn += lvl_pages;
+ pte++;
+
+ if (!clf_pages || first_pte_in_page(pte) ||
+ (level > 1 && clf_pages < lvl_pages)) {
+ domain_flush_cache(domain, first_pte,
+ (void *)pte - (void *)first_pte);
+ pte = NULL;
+ }
+ }
+}
+
+static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long pages = aligned_nrpages(iova, size);
+ unsigned long pfn = iova >> VTD_PAGE_SHIFT;
+ struct intel_iommu *iommu;
+ int iommu_id;
+
+ if (!dmar_domain->iommu_coherency)
+ clflush_sync_map(dmar_domain, pfn, pages);
+
+ for_each_domain_iommu(iommu_id, dmar_domain) {
+ iommu = g_iommus[iommu_id];
+ __mapping_notify_one(iommu, dmar_domain, pfn, pages);
+ }
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -5520,6 +5584,7 @@ const struct iommu_ops intel_iommu_ops = {
.aux_detach_dev = intel_iommu_aux_detach_device,
.aux_get_pasid = intel_iommu_aux_get_pasid,
.map = intel_iommu_map,
+ .iotlb_sync_map = intel_iommu_iotlb_sync_map,
.unmap = intel_iommu_unmap,
.flush_iotlb_all = intel_flush_iotlb_all,
.iotlb_sync = intel_iommu_tlb_sync,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 685200a5cff0..611ef5243cb6 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -22,6 +22,7 @@
#include <asm/pci-direct.h>
#include "../irq_remapping.h"
+#include "cap_audit.h"
enum irq_mode {
IRQ_REMAPPING,
@@ -734,6 +735,9 @@ static int __init intel_prepare_irq_remapping(void)
if (dmar_table_init() < 0)
return -ENODEV;
+ if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL))
+ goto error;
+
if (!dmar_ir_support())
return -ENODEV;
@@ -1439,6 +1443,10 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
int ret;
int eim = x2apic_enabled();
+ ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu);
+ if (ret)
+ return ret;
+
if (eim && !ecap_eim_support(iommu->ecap)) {
pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
iommu->reg_phys, iommu->ecap);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index b92af83b79bd..f26cb6195b2c 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -457,20 +457,6 @@ pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
}
static void
-iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
-{
- struct qi_desc desc;
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- qi_submit_sync(iommu, &desc, 1, 0);
-}
-
-static void
devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
struct device *dev, u32 pasid)
{
@@ -514,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
clflush_cache_range(pte, sizeof(*pte));
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
@@ -530,7 +516,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
if (cap_caching_mode(iommu->cap)) {
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
} else {
iommu_flush_write_buffer(iommu);
}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 18a9f05df407..574a7e657a9a 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -123,53 +123,16 @@ static void __flush_svm_range_dev(struct intel_svm *svm,
unsigned long address,
unsigned long pages, int ih)
{
- struct qi_desc desc;
+ struct device_domain_info *info = get_domain_info(sdev->dev);
- if (pages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
- QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(pages));
-
- desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
- QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(address) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
- desc.qw2 = 0;
- desc.qw3 = 0;
- qi_submit_sync(sdev->iommu, &desc, 1, 0);
-
- if (sdev->dev_iotlb) {
- desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
- QI_DEV_EIOTLB_SID(sdev->sid) |
- QI_DEV_EIOTLB_QDEP(sdev->qdep) |
- QI_DEIOTLB_TYPE;
- if (pages == -1) {
- desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
- QI_DEV_EIOTLB_SIZE;
- } else if (pages > 1) {
- /* The least significant zero bit indicates the size. So,
- * for example, an "address" value of 0x12345f000 will
- * flush from 0x123440000 to 0x12347ffff (256KiB). */
- unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
- unsigned long mask = __rounddown_pow_of_two(address ^ last);
-
- desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
- (mask - 1)) | QI_DEV_EIOTLB_SIZE;
- } else {
- desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
- }
- desc.qw2 = 0;
- desc.qw3 = 0;
- qi_submit_sync(sdev->iommu, &desc, 1, 0);
- }
+ if (WARN_ON(!pages))
+ return;
+
+ qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
+ svm->pasid, sdev->qdep, address,
+ order_base_2(pages));
}
static void intel_flush_svm_range_dev(struct intel_svm *svm,
@@ -948,10 +911,8 @@ static irqreturn_t prq_event_thread(int irq, void *d)
u64 address;
handled = 1;
-
req = &iommu->prq[head / sizeof(*req)];
-
- result = QI_RESP_FAILURE;
+ result = QI_RESP_INVALID;
address = (u64)req->addr << VTD_PAGE_SHIFT;
if (!req->pasid_present) {
pr_err("%s: Page request without PASID: %08llx %08llx\n",
@@ -989,7 +950,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
rcu_read_unlock();
}
- result = QI_RESP_INVALID;
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
if (!svm->mm)
@@ -1079,8 +1039,17 @@ prq_advance:
* Clear the page request overflow bit and wake up all threads that
* are waiting for the completion of this handling.
*/
- if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO)
- writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
+ iommu->name);
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ if (head == tail) {
+ writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
+ iommu->name);
+ }
+ }
if (!completion_done(&iommu->prq_complete))
complete(&iommu->prq_complete);
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac948db7..d4004bcf333a 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -44,26 +44,25 @@
/*
* We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
- * and 12 bits in a page. With some carefully-chosen coefficients we can
- * hide the ugly inconsistencies behind these macros and at least let the
- * rest of the code pretend to be somewhat sane.
+ * and 12 bits in a page.
+ * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2.
*/
#define ARM_V7S_ADDR_BITS 32
-#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4)
-#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
+#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8)
+#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12)
#define ARM_V7S_TABLE_SHIFT 10
-#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl))
-#define ARM_V7S_TABLE_SIZE(lvl) \
- (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
+#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg))
+#define ARM_V7S_TABLE_SIZE(lvl, cfg) \
+ (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte))
#define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl))
#define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
#define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
-#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1)
-#define ARM_V7S_LVL_IDX(addr, lvl) ({ \
+#define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1)
+#define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \
int _l = lvl; \
- ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
+ ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
})
/*
@@ -112,9 +111,10 @@
#define ARM_V7S_TEX_MASK 0x7
#define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
-/* MediaTek extend the two bits for PA 32bit/33bit */
+/* MediaTek extend the bits below for PA 32bit/33bit/34bit */
#define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
#define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)
+#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5)
/* *well, except for TEX on level 2 large pages, of course :( */
#define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
@@ -194,6 +194,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
if (paddr & BIT_ULL(33))
pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+ if (paddr & BIT_ULL(34))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT34;
return pte;
}
@@ -218,6 +220,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
paddr |= BIT_ULL(32);
if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
paddr |= BIT_ULL(33);
+ if (pte & ARM_V7S_ATTR_MTK_PA_BIT34)
+ paddr |= BIT_ULL(34);
return paddr;
}
@@ -234,7 +238,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
struct device *dev = cfg->iommu_dev;
phys_addr_t phys;
dma_addr_t dma;
- size_t size = ARM_V7S_TABLE_SIZE(lvl);
+ size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
void *table = NULL;
if (lvl == 1)
@@ -280,7 +284,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
struct device *dev = cfg->iommu_dev;
- size_t size = ARM_V7S_TABLE_SIZE(lvl);
+ size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
if (!cfg->coherent_walk)
dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
@@ -424,7 +428,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
arm_v7s_iopte *tblp;
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
- tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
+ tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg);
if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
@@ -477,7 +481,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
/* Find our entry at the current level */
- ptep += ARM_V7S_LVL_IDX(iova, lvl);
+ ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg);
/* If we can install a leaf entry at this level, then do so */
if (num_entries)
@@ -519,7 +523,6 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
- struct io_pgtable *iop = &data->iop;
int ret;
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
@@ -535,12 +538,7 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
*/
- if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
- io_pgtable_tlb_flush_walk(iop, iova, size,
- ARM_V7S_BLOCK_SIZE(2));
- } else {
- wmb();
- }
+ wmb();
return ret;
}
@@ -550,7 +548,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
int i;
- for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
+ for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) {
arm_v7s_iopte pte = data->pgd[i];
if (ARM_V7S_PTE_IS_TABLE(pte, 1))
@@ -602,9 +600,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
- num_ptes = ARM_V7S_PTES_PER_LVL(2);
+ num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
num_entries = size >> ARM_V7S_LVL_SHIFT(2);
- unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
+ unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
if (num_entries > 1)
@@ -646,7 +644,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
if (WARN_ON(lvl > 2))
return 0;
- idx = ARM_V7S_LVL_IDX(iova, lvl);
+ idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg);
ptep += idx;
do {
pte[i] = READ_ONCE(ptep[i]);
@@ -717,7 +715,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
- if (WARN_ON(upper_32_bits(iova)))
+ if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
@@ -732,7 +730,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
u32 mask;
do {
- ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
+ ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg);
pte = READ_ONCE(*ptep);
ptep = iopte_deref(pte, lvl, data);
} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
@@ -751,15 +749,14 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
{
struct arm_v7s_io_pgtable *data;
- if (cfg->ias > ARM_V7S_ADDR_BITS)
+ if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
return NULL;
- if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
+ if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS))
return NULL;
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_TLBI_ON_MAP |
IO_PGTABLE_QUIRK_ARM_MTK_EXT |
IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
@@ -775,8 +772,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
spin_lock_init(&data->split_lock);
data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
- ARM_V7S_TABLE_SIZE(2),
- ARM_V7S_TABLE_SIZE(2),
+ ARM_V7S_TABLE_SIZE(2, cfg),
+ ARM_V7S_TABLE_SIZE(2, cfg),
ARM_V7S_TABLE_SLAB_FLAGS, NULL);
if (!data->l2_tables)
goto out_free_data;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 94394c81468f..6e9917ce980f 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -24,6 +24,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
#endif
+#ifdef CONFIG_AMD_IOMMU
+ [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns,
+#endif
};
struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda8d6de..d0b0a15dba84 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1980,6 +1980,16 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(iommu_attach_device);
+int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
+{
+ const struct iommu_ops *ops = domain->ops;
+
+ if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev))
+ return __iommu_attach_device(domain, dev);
+
+ return 0;
+}
+
/*
* Check flags and other user provided data for valid combinations. We also
* make sure no reserved fields or unused flags are set. This is to ensure
@@ -2426,9 +2436,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
size -= pgsize;
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain);
-
/* unroll mapping in case something went wrong */
if (ret)
iommu_unmap(domain, orig_iova, orig_size - size);
@@ -2438,18 +2445,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ const struct iommu_ops *ops = domain->ops;
+ int ret;
+
+ ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
+ if (ret == 0 && ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain, iova, size);
+
+ return ret;
+}
+
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
might_sleep();
- return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(iommu_map);
int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
- return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
}
EXPORT_SYMBOL_GPL(iommu_map_atomic);
@@ -2533,6 +2553,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
+ const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
@@ -2563,6 +2584,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
sg = sg_next(sg);
}
+ if (ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain, iova, mapped);
return mapped;
out_err:
@@ -2586,7 +2609,6 @@ size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
{
return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
}
-EXPORT_SYMBOL_GPL(iommu_map_sg_atomic);
int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
phys_addr_t paddr, u64 size, int prot)
@@ -2599,15 +2621,6 @@ int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
}
EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
-void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
-{
- if (unlikely(domain->ops->domain_window_disable == NULL))
- return;
-
- return domain->ops->domain_window_disable(domain, wnd_nr);
-}
-EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
-
/**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
* @domain: the iommu domain where the fault has happened
@@ -2863,17 +2876,6 @@ EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
/*
* Per device IOMMU features.
*/
-bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat)
-{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
-
- if (ops && ops->dev_has_feat)
- return ops->dev_has_feat(dev, feat);
-
- return false;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_has_feature);
-
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d20b8b333d30..e6e2fa85271c 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -55,7 +55,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
}
EXPORT_SYMBOL_GPL(init_iova_domain);
-bool has_iova_flush_queue(struct iova_domain *iovad)
+static bool has_iova_flush_queue(struct iova_domain *iovad)
{
return !!iovad->fq;
}
@@ -112,7 +112,6 @@ int init_iova_flush_queue(struct iova_domain *iovad,
return 0;
}
-EXPORT_SYMBOL_GPL(init_iova_flush_queue);
static struct rb_node *
__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
@@ -451,7 +450,6 @@ retry:
return new_iova->pfn_lo;
}
-EXPORT_SYMBOL_GPL(alloc_iova_fast);
/**
* free_iova_fast - free iova pfn range into rcache
@@ -598,7 +596,6 @@ void queue_iova(struct iova_domain *iovad,
mod_timer(&iovad->fq_timer,
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
}
-EXPORT_SYMBOL_GPL(queue_iova);
/**
* put_iova_domain - destroys the iova domain
@@ -710,36 +707,6 @@ finish:
}
EXPORT_SYMBOL_GPL(reserve_iova);
-/**
- * copy_reserved_iova - copies the reserved between domains
- * @from: - source domain from where to copy
- * @to: - destination domin where to copy
- * This function copies reserved iova's from one domain to
- * other.
- */
-void
-copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
-{
- unsigned long flags;
- struct rb_node *node;
-
- spin_lock_irqsave(&from->iova_rbtree_lock, flags);
- for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
- struct iova *iova = rb_entry(node, struct iova, node);
- struct iova *new_iova;
-
- if (iova->pfn_lo == IOVA_ANCHOR)
- continue;
-
- new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
- if (!new_iova)
- pr_err("Reserve iova range %lx@%lx failed\n",
- iova->pfn_lo, iova->pfn_lo);
- }
- spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
-}
-EXPORT_SYMBOL_GPL(copy_reserved_iova);
-
/*
* Magazine caches for IOVA ranges. For an introduction to magazines,
* see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index d71f10257f15..eaaec0a55cc6 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -734,54 +734,45 @@ static int ipmmu_init_platform_device(struct device *dev,
return 0;
}
-static const struct soc_device_attribute soc_rcar_gen3[] = {
- { .soc_id = "r8a774a1", },
- { .soc_id = "r8a774b1", },
- { .soc_id = "r8a774c0", },
- { .soc_id = "r8a774e1", },
- { .soc_id = "r8a7795", },
- { .soc_id = "r8a77961", },
- { .soc_id = "r8a7796", },
- { .soc_id = "r8a77965", },
- { .soc_id = "r8a77970", },
- { .soc_id = "r8a77990", },
- { .soc_id = "r8a77995", },
+static const struct soc_device_attribute soc_needs_opt_in[] = {
+ { .family = "R-Car Gen3", },
+ { .family = "RZ/G2", },
{ /* sentinel */ }
};
-static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = {
- { .soc_id = "r8a774b1", },
- { .soc_id = "r8a774c0", },
- { .soc_id = "r8a774e1", },
- { .soc_id = "r8a7795", .revision = "ES3.*" },
- { .soc_id = "r8a77961", },
- { .soc_id = "r8a77965", },
- { .soc_id = "r8a77990", },
- { .soc_id = "r8a77995", },
+static const struct soc_device_attribute soc_denylist[] = {
+ { .soc_id = "r8a774a1", },
+ { .soc_id = "r8a7795", .revision = "ES1.*" },
+ { .soc_id = "r8a7795", .revision = "ES2.*" },
+ { .soc_id = "r8a7796", },
{ /* sentinel */ }
};
-static const char * const rcar_gen3_slave_whitelist[] = {
+static const char * const devices_allowlist[] = {
+ "ee100000.mmc",
+ "ee120000.mmc",
+ "ee140000.mmc",
+ "ee160000.mmc"
};
-static bool ipmmu_slave_whitelist(struct device *dev)
+static bool ipmmu_device_is_allowed(struct device *dev)
{
unsigned int i;
/*
- * For R-Car Gen3 use a white list to opt-in slave devices.
+ * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices.
* For Other SoCs, this returns true anyway.
*/
- if (!soc_device_match(soc_rcar_gen3))
+ if (!soc_device_match(soc_needs_opt_in))
return true;
- /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */
- if (!soc_device_match(soc_rcar_gen3_whitelist))
+ /* Check whether this SoC can use the IPMMU correctly or not */
+ if (soc_device_match(soc_denylist))
return false;
- /* Check whether this slave device can work with the IPMMU */
- for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) {
- if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i]))
+ /* Check whether this device can work with the IPMMU */
+ for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) {
+ if (!strcmp(dev_name(dev), devices_allowlist[i]))
return true;
}
@@ -792,7 +783,7 @@ static bool ipmmu_slave_whitelist(struct device *dev)
static int ipmmu_of_xlate(struct device *dev,
struct of_phandle_args *spec)
{
- if (!ipmmu_slave_whitelist(dev))
+ if (!ipmmu_device_is_allowed(dev))
return -ENODEV;
iommu_fwspec_add_ids(dev, spec->args, 1);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 040e85f70861..f0ba6a09b434 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -343,7 +343,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
spin_lock_init(&priv->pgtlock);
priv->cfg = (struct io_pgtable_cfg) {
- .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP,
.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
@@ -490,6 +489,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ struct msm_priv *priv = to_msm_priv(domain);
+
+ __flush_iotlb_range(iova, size, SZ_4K, false, priv);
+}
+
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t len, struct iommu_iotlb_gather *gather)
{
@@ -680,6 +687,7 @@ static struct iommu_ops msm_iommu_ops = {
* kick starting the other master.
*/
.iotlb_sync = NULL,
+ .iotlb_sync_map = msm_iommu_sync_map,
.iova_to_phys = msm_iommu_iova_to_phys,
.probe_device = msm_iommu_probe_device,
.release_device = msm_iommu_release_device,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8e56cec532e7..6ecc007f07cd 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -3,10 +3,12 @@
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Yong Wu <yong.wu@mediatek.com>
*/
+#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/clk.h>
#include <linux/component.h>
#include <linux/device.h>
+#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/err.h>
#include <linux/interrupt.h>
@@ -20,6 +22,7 @@
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -88,6 +91,9 @@
#define F_REG_MMU1_FAULT_MASK GENMASK(13, 7)
#define REG_MMU0_FAULT_VA 0x13c
+#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12)
+#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9)
+#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6)
#define F_MMU_FAULT_VA_WRITE_BIT BIT(1)
#define F_MMU_FAULT_VA_LAYER_BIT BIT(0)
@@ -103,13 +109,6 @@
#define MTK_PROTECT_PA_ALIGN 256
-/*
- * Get the local arbiter ID and the portid within the larb arbiter
- * from mtk_m4u_id which is defined by MTK_M4U_ID.
- */
-#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf)
-#define MTK_M4U_TO_PORT(id) ((id) & 0x1f)
-
#define HAS_4GB_MODE BIT(0)
/* HW will use the EMI clock if there isn't the "bclk". */
#define HAS_BCLK BIT(1)
@@ -119,6 +118,7 @@
#define HAS_SUB_COMM BIT(5)
#define WR_THROT_EN BIT(6)
#define HAS_LEGACY_IVRP_PADDR BIT(7)
+#define IOVA_34_EN BIT(8)
#define MTK_IOMMU_HAS_FLAG(pdata, _x) \
((((pdata)->flags) & (_x)) == (_x))
@@ -127,11 +127,19 @@ struct mtk_iommu_domain {
struct io_pgtable_cfg cfg;
struct io_pgtable_ops *iop;
+ struct mtk_iommu_data *data;
struct iommu_domain domain;
};
static const struct iommu_ops mtk_iommu_ops;
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data);
+
+#define MTK_IOMMU_TLB_ADDR(iova) ({ \
+ dma_addr_t _addr = iova; \
+ ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\
+})
+
/*
* In M4U 4GB mode, the physical address is remapped as below:
*
@@ -160,6 +168,25 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */
#define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list)
+struct mtk_iommu_iova_region {
+ dma_addr_t iova_base;
+ unsigned long long size;
+};
+
+static const struct mtk_iommu_iova_region single_domain[] = {
+ {.iova_base = 0, .size = SZ_4G},
+};
+
+static const struct mtk_iommu_iova_region mt8192_multi_dom[] = {
+ { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */
+ #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+ { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */
+ { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */
+ { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */
+ { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */
+ #endif
+};
+
/*
* There may be 1 or 2 M4U HWs, But we always expect they are in the same domain
* for the performance.
@@ -182,33 +209,43 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
return container_of(dom, struct mtk_iommu_domain, domain);
}
-static void mtk_iommu_tlb_flush_all(void *cookie)
+static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data)
{
- struct mtk_iommu_data *data = cookie;
-
for_each_m4u(data) {
+ if (pm_runtime_get_if_in_use(data->dev) <= 0)
+ continue;
+
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
data->base + data->plat_data->inv_sel_reg);
writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
wmb(); /* Make sure the tlb flush all done */
+
+ pm_runtime_put(data->dev);
}
}
static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+ size_t granule,
+ struct mtk_iommu_data *data)
{
- struct mtk_iommu_data *data = cookie;
+ bool has_pm = !!data->dev->pm_domain;
unsigned long flags;
int ret;
u32 tmp;
for_each_m4u(data) {
+ if (has_pm) {
+ if (pm_runtime_get_if_in_use(data->dev) <= 0)
+ continue;
+ }
+
spin_lock_irqsave(&data->tlb_lock, flags);
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
data->base + data->plat_data->inv_sel_reg);
- writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
- writel_relaxed(iova + size - 1,
+ writel_relaxed(MTK_IOMMU_TLB_ADDR(iova),
+ data->base + REG_MMU_INVLD_START_A);
+ writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1),
data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
data->base + REG_MMU_INVALIDATE);
@@ -219,36 +256,24 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
- mtk_iommu_tlb_flush_all(cookie);
+ mtk_iommu_tlb_flush_all(data);
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
spin_unlock_irqrestore(&data->tlb_lock, flags);
- }
-}
-
-static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- struct iommu_domain *domain = &data->m4u_dom->domain;
- iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+ if (has_pm)
+ pm_runtime_put(data->dev);
+ }
}
-static const struct iommu_flush_ops mtk_iommu_flush_ops = {
- .tlb_flush_all = mtk_iommu_tlb_flush_all,
- .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync,
- .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
-};
-
static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
{
struct mtk_iommu_data *data = dev_id;
struct mtk_iommu_domain *dom = data->m4u_dom;
- u32 int_state, regval, fault_iova, fault_pa;
unsigned int fault_larb, fault_port, sub_comm = 0;
+ u32 int_state, regval, va34_32, pa34_32;
+ u64 fault_iova, fault_pa;
bool layer, write;
/* Read error info from registers */
@@ -264,6 +289,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
}
layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) {
+ va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova);
+ pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova);
+ fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK;
+ fault_iova |= (u64)va34_32 << 32;
+ fault_pa |= (u64)pa34_32 << 32;
+ }
+
fault_port = F_MMU_INT_ID_PORT_ID(regval);
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) {
fault_larb = F_MMU_INT_ID_COMM_ID(regval);
@@ -277,7 +310,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
dev_err_ratelimited(
data->dev,
- "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
+ "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n",
int_state, fault_iova, fault_pa, fault_larb, fault_port,
layer, write ? "write" : "read");
}
@@ -292,21 +325,57 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void mtk_iommu_config(struct mtk_iommu_data *data,
- struct device *dev, bool enable)
+static int mtk_iommu_get_domain_id(struct device *dev,
+ const struct mtk_iommu_plat_data *plat_data)
+{
+ const struct mtk_iommu_iova_region *rgn = plat_data->iova_region;
+ const struct bus_dma_region *dma_rgn = dev->dma_range_map;
+ int i, candidate = -1;
+ dma_addr_t dma_end;
+
+ if (!dma_rgn || plat_data->iova_region_nr == 1)
+ return 0;
+
+ dma_end = dma_rgn->dma_start + dma_rgn->size - 1;
+ for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) {
+ /* Best fit. */
+ if (dma_rgn->dma_start == rgn->iova_base &&
+ dma_end == rgn->iova_base + rgn->size - 1)
+ return i;
+ /* ok if it is inside this region. */
+ if (dma_rgn->dma_start >= rgn->iova_base &&
+ dma_end < rgn->iova_base + rgn->size)
+ candidate = i;
+ }
+
+ if (candidate >= 0)
+ return candidate;
+ dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n",
+ &dma_rgn->dma_start, dma_rgn->size);
+ return -EINVAL;
+}
+
+static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
+ bool enable, unsigned int domid)
{
struct mtk_smi_larb_iommu *larb_mmu;
unsigned int larbid, portid;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ const struct mtk_iommu_iova_region *region;
int i;
for (i = 0; i < fwspec->num_ids; ++i) {
larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
+
larb_mmu = &data->larb_imu[larbid];
- dev_dbg(dev, "%s iommu port: %d\n",
- enable ? "enable" : "disable", portid);
+ region = data->plat_data->iova_region + domid;
+ larb_mmu->bank[portid] = upper_32_bits(region->iova_base);
+
+ dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n",
+ enable ? "enable" : "disable", dev_name(larb_mmu->dev),
+ portid, domid, larb_mmu->bank[portid]);
if (enable)
larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
@@ -315,22 +384,34 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
}
}
-static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
+static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
+ struct mtk_iommu_data *data,
+ unsigned int domid)
{
- struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+ const struct mtk_iommu_iova_region *region;
+
+ /* Use the exist domain as there is only one pgtable here. */
+ if (data->m4u_dom) {
+ dom->iop = data->m4u_dom->iop;
+ dom->cfg = data->m4u_dom->cfg;
+ dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap;
+ goto update_iova_region;
+ }
dom->cfg = (struct io_pgtable_cfg) {
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_TLBI_ON_MAP |
IO_PGTABLE_QUIRK_ARM_MTK_EXT,
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
- .ias = 32,
- .oas = 34,
- .tlb = &mtk_iommu_flush_ops,
+ .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32,
.iommu_dev = data->dev,
};
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE))
+ dom->cfg.oas = data->enable_4GB ? 33 : 32;
+ else
+ dom->cfg.oas = 35;
+
dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
if (!dom->iop) {
dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -339,6 +420,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
/* Update our support page sizes bitmap */
dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+
+update_iova_region:
+ /* Update the iova region for this domain */
+ region = data->plat_data->iova_region + domid;
+ dom->domain.geometry.aperture_start = region->iova_base;
+ dom->domain.geometry.aperture_end = region->iova_base + region->size - 1;
+ dom->domain.geometry.force_aperture = true;
return 0;
}
@@ -353,30 +441,16 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
if (!dom)
return NULL;
- if (iommu_get_dma_cookie(&dom->domain))
- goto free_dom;
-
- if (mtk_iommu_domain_finalise(dom))
- goto put_dma_cookie;
-
- dom->domain.geometry.aperture_start = 0;
- dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
- dom->domain.geometry.force_aperture = true;
+ if (iommu_get_dma_cookie(&dom->domain)) {
+ kfree(dom);
+ return NULL;
+ }
return &dom->domain;
-
-put_dma_cookie:
- iommu_put_dma_cookie(&dom->domain);
-free_dom:
- kfree(dom);
- return NULL;
}
static void mtk_iommu_domain_free(struct iommu_domain *domain)
{
- struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-
- free_io_pgtable_ops(dom->iop);
iommu_put_dma_cookie(domain);
kfree(to_mtk_domain(domain));
}
@@ -386,18 +460,37 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ struct device *m4udev = data->dev;
+ int ret, domid;
- if (!data)
- return -ENODEV;
+ domid = mtk_iommu_get_domain_id(dev, data->plat_data);
+ if (domid < 0)
+ return domid;
+
+ if (!dom->data) {
+ if (mtk_iommu_domain_finalise(dom, data, domid))
+ return -ENODEV;
+ dom->data = data;
+ }
+
+ if (!data->m4u_dom) { /* Initialize the M4U HW */
+ ret = pm_runtime_resume_and_get(m4udev);
+ if (ret < 0)
+ return ret;
- /* Update the pgtable base address register of the M4U HW */
- if (!data->m4u_dom) {
+ ret = mtk_iommu_hw_init(data);
+ if (ret) {
+ pm_runtime_put(m4udev);
+ return ret;
+ }
data->m4u_dom = dom;
writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
data->base + REG_MMU_PT_BASE_ADDR);
+
+ pm_runtime_put(m4udev);
}
- mtk_iommu_config(data, dev, true);
+ mtk_iommu_config(data, dev, true, domid);
return 0;
}
@@ -406,20 +499,16 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
- if (!data)
- return;
-
- mtk_iommu_config(data, dev, false);
+ mtk_iommu_config(data, dev, false, 0);
}
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
/* The "4GB mode" M4U physically can not use the lower remap of Dram. */
- if (data->enable_4GB)
+ if (dom->data->enable_4GB)
paddr |= BIT_ULL(32);
/* Synchronize with the tlb_lock */
@@ -431,37 +520,48 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ unsigned long end = iova + size - 1;
+ if (gather->start > iova)
+ gather->start = iova;
+ if (gather->end < end)
+ gather->end = end;
return dom->iop->unmap(dom->iop, iova, size, gather);
}
static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
- mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
+ struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+
+ mtk_iommu_tlb_flush_all(dom->data);
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
- size_t length = gather->end - gather->start;
-
- if (gather->start == ULONG_MAX)
- return;
+ struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ size_t length = gather->end - gather->start + 1;
mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize,
- data);
+ dom->data);
+}
+
+static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+
+ mtk_iommu_tlb_flush_range_sync(iova, size, size, dom->data);
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
phys_addr_t pa;
pa = dom->iop->iova_to_phys(dom->iop, iova);
- if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+ if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
pa &= ~BIT_ULL(32);
return pa;
@@ -493,19 +593,25 @@ static void mtk_iommu_release_device(struct device *dev)
static struct iommu_group *mtk_iommu_device_group(struct device *dev)
{
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+ struct iommu_group *group;
+ int domid;
if (!data)
return ERR_PTR(-ENODEV);
- /* All the client devices are in the same m4u iommu-group */
- if (!data->m4u_group) {
- data->m4u_group = iommu_group_alloc();
- if (IS_ERR(data->m4u_group))
- dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+ domid = mtk_iommu_get_domain_id(dev, data->plat_data);
+ if (domid < 0)
+ return ERR_PTR(domid);
+
+ group = data->m4u_group[domid];
+ if (!group) {
+ group = iommu_group_alloc();
+ if (!IS_ERR(group))
+ data->m4u_group[domid] = group;
} else {
- iommu_group_ref_get(data->m4u_group);
+ iommu_group_ref_get(group);
}
- return data->m4u_group;
+ return group;
}
static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
@@ -530,6 +636,35 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
}
+static void mtk_iommu_get_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
+ unsigned int domid = mtk_iommu_get_domain_id(dev, data->plat_data), i;
+ const struct mtk_iommu_iova_region *resv, *curdom;
+ struct iommu_resv_region *region;
+ int prot = IOMMU_WRITE | IOMMU_READ;
+
+ if ((int)domid < 0)
+ return;
+ curdom = data->plat_data->iova_region + domid;
+ for (i = 0; i < data->plat_data->iova_region_nr; i++) {
+ resv = data->plat_data->iova_region + i;
+
+ /* Only reserve when the region is inside the current domain */
+ if (resv->iova_base <= curdom->iova_base ||
+ resv->iova_base + resv->size >= curdom->iova_base + curdom->size)
+ continue;
+
+ region = iommu_alloc_resv_region(resv->iova_base, resv->size,
+ prot, IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+
+ list_add_tail(&region->list, head);
+ }
+}
+
static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc = mtk_iommu_domain_alloc,
.domain_free = mtk_iommu_domain_free,
@@ -539,11 +674,14 @@ static const struct iommu_ops mtk_iommu_ops = {
.unmap = mtk_iommu_unmap,
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
+ .iotlb_sync_map = mtk_iommu_sync_map,
.iova_to_phys = mtk_iommu_iova_to_phys,
.probe_device = mtk_iommu_probe_device,
.release_device = mtk_iommu_release_device,
.device_group = mtk_iommu_device_group,
.of_xlate = mtk_iommu_of_xlate,
+ .get_resv_regions = mtk_iommu_get_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
};
@@ -639,6 +777,9 @@ static int mtk_iommu_probe(struct platform_device *pdev)
{
struct mtk_iommu_data *data;
struct device *dev = &pdev->dev;
+ struct device_node *larbnode, *smicomm_node;
+ struct platform_device *plarbdev;
+ struct device_link *link;
struct resource *res;
resource_size_t ioaddr;
struct component_match *match = NULL;
@@ -705,8 +846,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
return larb_nr;
for (i = 0; i < larb_nr; i++) {
- struct device_node *larbnode;
- struct platform_device *plarbdev;
u32 id;
larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
@@ -733,31 +872,65 @@ static int mtk_iommu_probe(struct platform_device *pdev)
compare_of, larbnode);
}
- platform_set_drvdata(pdev, data);
+ /* Get smi-common dev from the last larb. */
+ smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0);
+ if (!smicomm_node)
+ return -EINVAL;
- ret = mtk_iommu_hw_init(data);
- if (ret)
- return ret;
+ plarbdev = of_find_device_by_node(smicomm_node);
+ of_node_put(smicomm_node);
+ data->smicomm_dev = &plarbdev->dev;
+
+ pm_runtime_enable(dev);
+
+ link = device_link_add(data->smicomm_dev, dev,
+ DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+ if (!link) {
+ dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev));
+ ret = -EINVAL;
+ goto out_runtime_disable;
+ }
+
+ platform_set_drvdata(pdev, data);
ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
"mtk-iommu.%pa", &ioaddr);
if (ret)
- return ret;
+ goto out_link_remove;
iommu_device_set_ops(&data->iommu, &mtk_iommu_ops);
iommu_device_set_fwnode(&data->iommu, &pdev->dev.of_node->fwnode);
ret = iommu_device_register(&data->iommu);
if (ret)
- return ret;
+ goto out_sysfs_remove;
spin_lock_init(&data->tlb_lock);
list_add_tail(&data->list, &m4ulist);
- if (!iommu_present(&platform_bus_type))
- bus_set_iommu(&platform_bus_type, &mtk_iommu_ops);
+ if (!iommu_present(&platform_bus_type)) {
+ ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops);
+ if (ret)
+ goto out_list_del;
+ }
- return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+ ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+ if (ret)
+ goto out_bus_set_null;
+ return ret;
+
+out_bus_set_null:
+ bus_set_iommu(&platform_bus_type, NULL);
+out_list_del:
+ list_del(&data->list);
+ iommu_device_unregister(&data->iommu);
+out_sysfs_remove:
+ iommu_device_sysfs_remove(&data->iommu);
+out_link_remove:
+ device_link_remove(data->smicomm_dev, dev);
+out_runtime_disable:
+ pm_runtime_disable(dev);
+ return ret;
}
static int mtk_iommu_remove(struct platform_device *pdev)
@@ -771,12 +944,14 @@ static int mtk_iommu_remove(struct platform_device *pdev)
bus_set_iommu(&platform_bus_type, NULL);
clk_disable_unprepare(data->bclk);
+ device_link_remove(data->smicomm_dev, &pdev->dev);
+ pm_runtime_disable(&pdev->dev);
devm_free_irq(&pdev->dev, data->irq, data);
component_master_del(&pdev->dev, &mtk_iommu_com_ops);
return 0;
}
-static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev)
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
struct mtk_iommu_suspend_reg *reg = &data->reg;
@@ -794,7 +969,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
return 0;
}
-static int __maybe_unused mtk_iommu_resume(struct device *dev)
+static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev)
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
struct mtk_iommu_suspend_reg *reg = &data->reg;
@@ -802,6 +977,9 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
void __iomem *base = data->base;
int ret;
+ /* Avoid first resume to affect the default value of registers below. */
+ if (!m4u_dom)
+ return 0;
ret = clk_prepare_enable(data->bclk);
if (ret) {
dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret);
@@ -815,20 +993,22 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG);
- if (m4u_dom)
- writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
- base + REG_MMU_PT_BASE_ADDR);
+ writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR);
return 0;
}
static const struct dev_pm_ops mtk_iommu_pm_ops = {
- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
+ SET_RUNTIME_PM_OPS(mtk_iommu_runtime_suspend, mtk_iommu_runtime_resume, NULL)
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
};
static const struct mtk_iommu_plat_data mt2712_data = {
.m4u_plat = M4U_MT2712,
.flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG,
.inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}},
};
@@ -836,6 +1016,8 @@ static const struct mtk_iommu_plat_data mt6779_data = {
.m4u_plat = M4U_MT6779,
.flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN,
.inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}},
};
@@ -843,6 +1025,8 @@ static const struct mtk_iommu_plat_data mt8167_data = {
.m4u_plat = M4U_MT8167,
.flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR,
.inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */
};
@@ -851,6 +1035,8 @@ static const struct mtk_iommu_plat_data mt8173_data = {
.flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI |
HAS_LEGACY_IVRP_PADDR,
.inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */
};
@@ -858,15 +1044,29 @@ static const struct mtk_iommu_plat_data mt8183_data = {
.m4u_plat = M4U_MT8183,
.flags = RESET_AXI,
.inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .iova_region = single_domain,
+ .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}},
};
+static const struct mtk_iommu_plat_data mt8192_data = {
+ .m4u_plat = M4U_MT8192,
+ .flags = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN |
+ WR_THROT_EN | IOVA_34_EN,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .iova_region = mt8192_multi_dom,
+ .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
+ .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20},
+ {0, 14, 16}, {0, 13, 18, 17}},
+};
+
static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
{ .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data},
{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
+ { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data},
{}
};
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index df32b3e3408b..f81fa8862ed0 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -17,10 +17,13 @@
#include <linux/spinlock.h>
#include <linux/dma-mapping.h>
#include <soc/mediatek/smi.h>
+#include <dt-bindings/memory/mtk-memory-port.h>
#define MTK_LARB_COM_MAX 8
#define MTK_LARB_SUBCOM_MAX 4
+#define MTK_IOMMU_GROUP_MAX 8
+
struct mtk_iommu_suspend_reg {
union {
u32 standard_axi_mode;/* v1 */
@@ -42,12 +45,18 @@ enum mtk_iommu_plat {
M4U_MT8167,
M4U_MT8173,
M4U_MT8183,
+ M4U_MT8192,
};
+struct mtk_iommu_iova_region;
+
struct mtk_iommu_plat_data {
enum mtk_iommu_plat m4u_plat;
u32 flags;
u32 inv_sel_reg;
+
+ unsigned int iova_region_nr;
+ const struct mtk_iommu_iova_region *iova_region;
unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX];
};
@@ -61,12 +70,13 @@ struct mtk_iommu_data {
phys_addr_t protect_base; /* protect memory base */
struct mtk_iommu_suspend_reg reg;
struct mtk_iommu_domain *m4u_dom;
- struct iommu_group *m4u_group;
+ struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX];
bool enable_4GB;
spinlock_t tlb_lock; /* lock for tlb range flush */
struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
+ struct device *smicomm_dev;
struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index fac720273889..6f130e51f072 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -261,7 +261,8 @@ static int gart_iommu_of_xlate(struct device *dev,
return 0;
}
-static void gart_iommu_sync_map(struct iommu_domain *domain)
+static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
FLUSH_GART_REGS(gart_handle);
}
@@ -269,7 +270,9 @@ static void gart_iommu_sync_map(struct iommu_domain *domain)
static void gart_iommu_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- gart_iommu_sync_map(domain);
+ size_t length = gather->end - gather->start + 1;
+
+ gart_iommu_sync_map(domain, gather->start, length);
}
static const struct iommu_ops gart_iommu_ops = {