summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 23:20:53 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 23:20:53 +0300
commitf23cdfcd04f7c044ee47dac04484b8d289088776 (patch)
treeab4c292b83cf4534b08cdd258cda38893f2cfa39 /drivers/iommu/amd
parent706eacadd5c5cc13510ba69eea2917c2ce5ffa99 (diff)
parent38713c6028a3172c4c256512c3fbcfc799fe2d43 (diff)
downloadlinux-f23cdfcd04f7c044ee47dac04484b8d289088776.tar.xz
Merge tag 'iommu-updates-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: - remove the bus_set_iommu() interface which became unnecesary because of IOMMU per-device probing - make the dma-iommu.h header private - Intel VT-d changes from Lu Baolu: - Decouple PASID and PRI from SVA - Add ESRTPS & ESIRTPS capability check - Cleanups - Apple DART support for the M1 Pro/MAX SOCs - support for AMD IOMMUv2 page-tables for the DMA-API layer. The v2 page-tables are compatible with the x86 CPU page-tables. Using them for DMA-API prepares support for hardware-assisted IOMMU virtualization - support for MT6795 Helio X10 M4Us in the Mediatek IOMMU driver - some smaller fixes and cleanups * tag 'iommu-updates-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (59 commits) iommu/vt-d: Avoid unnecessary global DMA cache invalidation iommu/vt-d: Avoid unnecessary global IRTE cache invalidation iommu/vt-d: Rename cap_5lp_support to cap_fl5lp_support iommu/vt-d: Remove pasid_set_eafe() iommu/vt-d: Decouple PASID & PRI enabling from SVA iommu/vt-d: Remove unnecessary SVA data accesses in page fault path dt-bindings: iommu: arm,smmu-v3: Relax order of interrupt names iommu: dart: Support t6000 variant iommu/io-pgtable-dart: Add DART PTE support for t6000 iommu/io-pgtable: Add DART subpage protection support iommu/io-pgtable: Move Apple DART support to its own file iommu/mediatek: Add support for MT6795 Helio X10 M4Us iommu/mediatek: Introduce new flag TF_PORT_TO_ADDR_MT8173 dt-bindings: mediatek: Add bindings for MT6795 M4U iommu/iova: Fix module config properly iommu/amd: Fix sparse warning iommu/amd: Remove outdated comment iommu/amd: Free domain ID after domain_flush_pages iommu/amd: Free domain id in error path iommu/virtio: Fix compile error with viommu_capable() ...
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h10
-rw-r--r--drivers/iommu/amd/init.c47
-rw-r--r--drivers/iommu/amd/io_pgtable.c76
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c415
-rw-r--r--drivers/iommu/amd/iommu.c164
8 files changed, 597 insertions, 119 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index a3cbafb603f5..9b5fc3356bf2 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -9,7 +9,6 @@ config AMD_IOMMU
select PCI_PASID
select IOMMU_API
select IOMMU_IOVA
- select IOMMU_DMA
select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index a935f8f4b974..773d8aa00283 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 84e5bb1bf01b..c160a332ce33 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -18,7 +18,6 @@ extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
extern void amd_iommu_init_notifier(void);
-extern int amd_iommu_init_api(void);
extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5b1019dab328..1d0a70c85333 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -94,6 +94,7 @@
#define FEATURE_HE (1ULL<<8)
#define FEATURE_PC (1ULL<<9)
#define FEATURE_GAM_VAPIC (1ULL<<21)
+#define FEATURE_GIOSUP (1ULL<<48)
#define FEATURE_EPHSUP (1ULL<<50)
#define FEATURE_SNP (1ULL<<63)
@@ -276,6 +277,8 @@
* 512GB Pages are not supported due to a hardware bug
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+/* 4K, 2MB, 1G page sizes are supported */
+#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
@@ -376,6 +379,7 @@
#define DTE_FLAG_IW (1ULL << 62)
#define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_GIOV (1ULL << 54)
#define DTE_FLAG_GV (1ULL << 55)
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DTE_GLX_SHIFT (56)
@@ -434,6 +438,7 @@
#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
translation */
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
+#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
@@ -456,6 +461,8 @@ struct irq_remap_table {
/* Interrupt remapping feature used? */
extern bool amd_iommu_irq_remap;
+extern const struct iommu_ops amd_iommu_ops;
+
/* IVRS indicates that pre-boot remapping was enabled */
extern bool amdr_ivrs_remap_support;
@@ -526,7 +533,8 @@ struct amd_io_pgtable {
struct io_pgtable iop;
int mode;
u64 *root;
- atomic64_t pt_root; /* pgtable root and pgtable mode */
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
+ u64 *pgd; /* v2 pgtable pgd pointer */
};
/*
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index fdc642362c14..1a2d425bf568 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -95,8 +95,6 @@
* out of it.
*/
-extern const struct iommu_ops amd_iommu_ops;
-
/*
* structure describing one IOMMU in the ACPI table. Typically followed by one
* or more ivhd_entrys.
@@ -2068,6 +2066,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
+ if (amd_iommu_pgtable == AMD_IOMMU_V2) {
+ if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
+ !iommu_feature(iommu, FEATURE_GT)) {
+ pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ } else if (iommu_default_passthrough()) {
+ pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ }
+ }
+
if (is_rd890_iommu(iommu->dev)) {
int i, j;
@@ -2146,6 +2155,8 @@ static void print_iommu_info(void)
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
pr_info("X2APIC enabled\n");
}
+ if (amd_iommu_pgtable == AMD_IOMMU_V2)
+ pr_info("V2 page table enabled\n");
}
static int __init amd_iommu_init_pci(void)
@@ -2168,20 +2179,13 @@ static int __init amd_iommu_init_pci(void)
/*
* Order is important here to make sure any unity map requirements are
* fulfilled. The unity mappings are created and written to the device
- * table during the amd_iommu_init_api() call.
+ * table during the iommu_init_pci() call.
*
* After that we call init_device_table_dma() to make sure any
* uninitialized DTE will block DMA, and in the end we flush the caches
* of all IOMMUs to make sure the changes to the device table are
* active.
*/
- ret = amd_iommu_init_api();
- if (ret) {
- pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n",
- ret);
- goto out;
- }
-
for_each_pci_segment(pci_seg)
init_device_table_dma(pci_seg);
@@ -3366,17 +3370,30 @@ static int __init parse_amd_iommu_intr(char *str)
static int __init parse_amd_iommu_options(char *str)
{
- for (; *str; ++str) {
+ if (!str)
+ return -EINVAL;
+
+ while (*str) {
if (strncmp(str, "fullflush", 9) == 0) {
pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
iommu_set_dma_strict();
- }
- if (strncmp(str, "force_enable", 12) == 0)
+ } else if (strncmp(str, "force_enable", 12) == 0) {
amd_iommu_force_enable = true;
- if (strncmp(str, "off", 3) == 0)
+ } else if (strncmp(str, "off", 3) == 0) {
amd_iommu_disabled = true;
- if (strncmp(str, "force_isolation", 15) == 0)
+ } else if (strncmp(str, "force_isolation", 15) == 0) {
amd_iommu_force_isolation = true;
+ } else if (strncmp(str, "pgtbl_v1", 8) == 0) {
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ } else if (strncmp(str, "pgtbl_v2", 8) == 0) {
+ amd_iommu_pgtable = AMD_IOMMU_V2;
+ } else {
+ pr_notice("Unknown option - '%s'\n", str);
+ }
+
+ str += strcspn(str, ",");
+ while (*str == ',')
+ str++;
}
return 1;
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 7d4b61e5db47..ace0e9b8b913 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -360,8 +360,9 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
-static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
LIST_HEAD(freelist);
@@ -369,39 +370,47 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
u64 __pte, *pte;
int ret, i, count;
- BUG_ON(!IS_ALIGNED(iova, size));
- BUG_ON(!IS_ALIGNED(paddr, size));
+ BUG_ON(!IS_ALIGNED(iova, pgsize));
+ BUG_ON(!IS_ALIGNED(paddr, pgsize));
ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
goto out;
- count = PAGE_SIZE_PTE_COUNT(size);
- pte = alloc_pte(dom, iova, size, NULL, gfp, &updated);
+ while (pgcount > 0) {
+ count = PAGE_SIZE_PTE_COUNT(pgsize);
+ pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated);
- ret = -ENOMEM;
- if (!pte)
- goto out;
+ ret = -ENOMEM;
+ if (!pte)
+ goto out;
- for (i = 0; i < count; ++i)
- free_clear_pte(&pte[i], pte[i], &freelist);
+ for (i = 0; i < count; ++i)
+ free_clear_pte(&pte[i], pte[i], &freelist);
- if (!list_empty(&freelist))
- updated = true;
+ if (!list_empty(&freelist))
+ updated = true;
- if (count > 1) {
- __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- } else
- __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+ if (count > 1) {
+ __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize);
+ __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+ } else
+ __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- if (prot & IOMMU_PROT_IR)
- __pte |= IOMMU_PTE_IR;
- if (prot & IOMMU_PROT_IW)
- __pte |= IOMMU_PTE_IW;
+ if (prot & IOMMU_PROT_IR)
+ __pte |= IOMMU_PTE_IR;
+ if (prot & IOMMU_PROT_IW)
+ __pte |= IOMMU_PTE_IW;
- for (i = 0; i < count; ++i)
- pte[i] = __pte;
+ for (i = 0; i < count; ++i)
+ pte[i] = __pte;
+
+ iova += pgsize;
+ paddr += pgsize;
+ pgcount--;
+ if (mapped)
+ *mapped += pgsize;
+ }
ret = 0;
@@ -426,17 +435,18 @@ out:
return ret;
}
-static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
- unsigned long iova,
- size_t size,
- struct iommu_iotlb_gather *gather)
+static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
{
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
+ size_t size = pgcount << __ffs(pgsize);
- BUG_ON(!is_power_of_2(size));
+ BUG_ON(!is_power_of_2(pgsize));
unmapped = 0;
@@ -448,14 +458,14 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
count = PAGE_SIZE_PTE_COUNT(unmap_size);
for (i = 0; i < count; i++)
pte[i] = 0ULL;
+ } else {
+ return unmapped;
}
iova = (iova & ~(unmap_size - 1)) + unmap_size;
unmapped += unmap_size;
}
- BUG_ON(unmapped && !is_power_of_2(unmapped));
-
return unmapped;
}
@@ -514,8 +524,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
cfg->tlb = &v1_flush_ops;
- pgtable->iop.ops.map = iommu_v1_map_page;
- pgtable->iop.ops.unmap = iommu_v1_unmap_page;
+ pgtable->iop.ops.map_pages = iommu_v1_map_pages;
+ pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
return &pgtable->iop;
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
new file mode 100644
index 000000000000..8638ddf6fb3b
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table v2 allocator.
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Author: Vasant Hegde <vasant.hegde@amd.com>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/bitops.h>
+#include <linux/io-pgtable.h>
+#include <linux/kernel.h>
+
+#include <asm/barrier.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
+#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
+#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */
+#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */
+#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */
+#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */
+#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */
+#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */
+#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */
+
+#define MAX_PTRS_PER_PAGE 512
+
+#define IOMMU_PAGE_SIZE_2M BIT_ULL(21)
+#define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
+
+
+static inline int get_pgtable_level(void)
+{
+ /* 5 level page table is not supported */
+ return PAGE_MODE_4_LEVEL;
+}
+
+static inline bool is_large_pte(u64 pte)
+{
+ return (pte & IOMMU_PAGE_PSE);
+}
+
+static inline void *alloc_pgtable_page(void)
+{
+ return (void *)get_zeroed_page(GFP_KERNEL);
+}
+
+static inline u64 set_pgtable_attr(u64 *page)
+{
+ u64 prot;
+
+ prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
+ prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
+
+ return (iommu_virt_to_phys(page) | prot);
+}
+
+static inline void *get_pgtable_pte(u64 pte)
+{
+ return iommu_phys_to_virt(pte & PM_ADDR_MASK);
+}
+
+static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
+{
+ u64 pte;
+
+ pte = __sme_set(paddr & PM_ADDR_MASK);
+ pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
+ pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
+
+ if (prot & IOMMU_PROT_IW)
+ pte |= IOMMU_PAGE_RW;
+
+ /* Large page */
+ if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
+ pte |= IOMMU_PAGE_PSE;
+
+ return pte;
+}
+
+static inline u64 get_alloc_page_size(u64 size)
+{
+ if (size >= IOMMU_PAGE_SIZE_1G)
+ return IOMMU_PAGE_SIZE_1G;
+
+ if (size >= IOMMU_PAGE_SIZE_2M)
+ return IOMMU_PAGE_SIZE_2M;
+
+ return PAGE_SIZE;
+}
+
+static inline int page_size_to_level(u64 pg_size)
+{
+ if (pg_size == IOMMU_PAGE_SIZE_1G)
+ return PAGE_MODE_3_LEVEL;
+ if (pg_size == IOMMU_PAGE_SIZE_2M)
+ return PAGE_MODE_2_LEVEL;
+
+ return PAGE_MODE_1_LEVEL;
+}
+
+static inline void free_pgtable_page(u64 *pt)
+{
+ free_page((unsigned long)pt);
+}
+
+static void free_pgtable(u64 *pt, int level)
+{
+ u64 *p;
+ int i;
+
+ for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
+ /* PTE present? */
+ if (!IOMMU_PTE_PRESENT(pt[i]))
+ continue;
+
+ if (is_large_pte(pt[i]))
+ continue;
+
+ /*
+ * Free the next level. No need to look at l1 tables here since
+ * they can only contain leaf PTEs; just free them directly.
+ */
+ p = get_pgtable_pte(pt[i]);
+ if (level > 2)
+ free_pgtable(p, level - 1);
+ else
+ free_pgtable_page(p);
+ }
+
+ free_pgtable_page(pt);
+}
+
+/* Allocate page table */
+static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova,
+ unsigned long pg_size, bool *updated)
+{
+ u64 *pte, *page;
+ int level, end_level;
+
+ level = get_pgtable_level() - 1;
+ end_level = page_size_to_level(pg_size);
+ pte = &pgd[PM_LEVEL_INDEX(level, iova)];
+ iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
+
+ while (level >= end_level) {
+ u64 __pte, __npte;
+
+ __pte = *pte;
+
+ if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
+ /* Unmap large pte */
+ cmpxchg64(pte, *pte, 0ULL);
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte)) {
+ page = alloc_pgtable_page();
+ if (!page)
+ return NULL;
+
+ __npte = set_pgtable_attr(page);
+ /* pte could have been changed somewhere. */
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
+ free_pgtable_page(page);
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
+
+ continue;
+ }
+
+ level -= 1;
+ pte = get_pgtable_pte(__pte);
+ pte = &pte[PM_LEVEL_INDEX(level, iova)];
+ }
+
+ /* Tear down existing pte entries */
+ if (IOMMU_PTE_PRESENT(*pte)) {
+ u64 *__pte;
+
+ *updated = true;
+ __pte = get_pgtable_pte(*pte);
+ cmpxchg64(pte, *pte, 0ULL);
+ if (pg_size == IOMMU_PAGE_SIZE_1G)
+ free_pgtable(__pte, end_level - 1);
+ else if (pg_size == IOMMU_PAGE_SIZE_2M)
+ free_pgtable_page(__pte);
+ }
+
+ return pte;
+}
+
+/*
+ * This function checks if there is a PTE for a given dma address.
+ * If there is one, it returns the pointer to it.
+ */
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long iova, unsigned long *page_size)
+{
+ u64 *pte;
+ int level;
+
+ level = get_pgtable_level() - 1;
+ pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
+ /* Default page size is 4K */
+ *page_size = PAGE_SIZE;
+
+ while (level) {
+ /* Not present */
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ /* Walk to the next level */
+ pte = get_pgtable_pte(*pte);
+ pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
+
+ /* Large page */
+ if (is_large_pte(*pte)) {
+ if (level == PAGE_MODE_3_LEVEL)
+ *page_size = IOMMU_PAGE_SIZE_1G;
+ else if (level == PAGE_MODE_2_LEVEL)
+ *page_size = IOMMU_PAGE_SIZE_2M;
+ else
+ return NULL; /* Wrongly set PSE bit in PTE */
+
+ break;
+ }
+
+ level -= 1;
+ }
+
+ return pte;
+}
+
+static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+ struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
+ struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg;
+ u64 *pte;
+ unsigned long map_size;
+ unsigned long mapped_size = 0;
+ unsigned long o_iova = iova;
+ size_t size = pgcount << __ffs(pgsize);
+ int count = 0;
+ int ret = 0;
+ bool updated = false;
+
+ if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
+ return -EINVAL;
+
+ if (!(prot & IOMMU_PROT_MASK))
+ return -EINVAL;
+
+ while (mapped_size < size) {
+ map_size = get_alloc_page_size(pgsize);
+ pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated);
+ if (!pte) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *pte = set_pte_attr(paddr, map_size, prot);
+
+ count++;
+ iova += map_size;
+ paddr += map_size;
+ mapped_size += map_size;
+ }
+
+out:
+ if (updated) {
+ if (count > 1)
+ amd_iommu_flush_tlb(&pdom->domain, 0);
+ else
+ amd_iommu_flush_page(&pdom->domain, 0, o_iova);
+ }
+
+ if (mapped)
+ *mapped += mapped_size;
+
+ return ret;
+}
+
+static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &pgtable->iop.cfg;
+ unsigned long unmap_size;
+ unsigned long unmapped = 0;
+ size_t size = pgcount << __ffs(pgsize);
+ u64 *pte;
+
+ if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
+ return 0;
+
+ while (unmapped < size) {
+ pte = fetch_pte(pgtable, iova, &unmap_size);
+ if (!pte)
+ return unmapped;
+
+ *pte = 0ULL;
+
+ iova = (iova & ~(unmap_size - 1)) + unmap_size;
+ unmapped += unmap_size;
+ }
+
+ return unmapped;
+}
+
+static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long offset_mask, pte_pgsize;
+ u64 *pte, __pte;
+
+ pte = fetch_pte(pgtable, iova, &pte_pgsize);
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ offset_mask = pte_pgsize - 1;
+ __pte = __sme_clr(*pte & PM_ADDR_MASK);
+
+ return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
+/*
+ * ----------------------------------------------------
+ */
+static void v2_tlb_flush_all(void *cookie)
+{
+}
+
+static void v2_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops v2_flush_ops = {
+ .tlb_flush_all = v2_tlb_flush_all,
+ .tlb_flush_walk = v2_tlb_flush_walk,
+ .tlb_add_page = v2_tlb_add_page,
+};
+
+static void v2_free_pgtable(struct io_pgtable *iop)
+{
+ struct protection_domain *pdom;
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+
+ pdom = container_of(pgtable, struct protection_domain, iop);
+ if (!(pdom->flags & PD_IOMMUV2_MASK))
+ return;
+
+ /*
+ * Make changes visible to IOMMUs. No need to clear gcr3 entry
+ * as gcr3 table is already freed.
+ */
+ amd_iommu_domain_update(pdom);
+
+ /* Free page table */
+ free_pgtable(pgtable->pgd, get_pgtable_level());
+}
+
+static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+ struct protection_domain *pdom = (struct protection_domain *)cookie;
+ int ret;
+
+ pgtable->pgd = alloc_pgtable_page();
+ if (!pgtable->pgd)
+ return NULL;
+
+ ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd));
+ if (ret)
+ goto err_free_pgd;
+
+ pgtable->iop.ops.map_pages = iommu_v2_map_pages;
+ pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages;
+ pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
+
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
+ cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
+ cfg->tlb = &v2_flush_ops;
+
+ return &pgtable->iop;
+
+err_free_pgd:
+ free_pgtable_page(pgtable->pgd);
+
+ return NULL;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
+ .alloc = v2_alloc_pgtable,
+ .free = v2_free_pgtable,
+};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 828672a46a3d..65856e401949 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -11,8 +11,6 @@
#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/acpi.h>
-#include <linux/amba/bus.h>
-#include <linux/platform_device.h>
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
@@ -20,7 +18,6 @@
#include <linux/scatterlist.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
-#include <linux/dma-iommu.h>
#include <linux/iommu-helper.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
@@ -42,6 +39,7 @@
#include <asm/dma.h>
#include "amd_iommu.h"
+#include "../dma-iommu.h"
#include "../irq_remapping.h"
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
@@ -66,10 +64,6 @@ LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
-/*
- * Domain for untranslated devices - only allocated
- * if iommu=pt passed on kernel cmd line.
- */
const struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
@@ -85,6 +79,7 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
+static int domain_enable_v2(struct protection_domain *domain, int pasids);
/****************************************************************************
*
@@ -1597,6 +1592,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
flags |= tmp;
+
+ if (domain->flags & PD_GIOV_MASK)
+ pte_root |= DTE_FLAG_GIOV;
}
flags &= ~DEV_DOMID_MASK;
@@ -1650,6 +1648,10 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_iommu[iommu->index] += 1;
domain->dev_cnt += 1;
+ /* Override supported page sizes */
+ if (domain->flags & PD_GIOV_MASK)
+ domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
+
/* Update device table */
set_dte_entry(iommu, dev_data->devid, domain,
ats, dev_data->iommu_v2);
@@ -1694,7 +1696,7 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev)
pci_disable_pasid(pdev);
}
-static int pdev_iommuv2_enable(struct pci_dev *pdev)
+static int pdev_pri_ats_enable(struct pci_dev *pdev)
{
int ret;
@@ -1757,11 +1759,19 @@ static int attach_device(struct device *dev,
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
ret = -EINVAL;
- if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
+
+ /*
+ * In case of using AMD_IOMMU_V1 page table mode and the device
+ * is enabling for PPR/ATS support (using v2 table),
+ * we need to make sure that the domain type is identity map.
+ */
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ def_domain->type != IOMMU_DOMAIN_IDENTITY) {
goto out;
+ }
if (dev_data->iommu_v2) {
- if (pdev_iommuv2_enable(pdev) != 0)
+ if (pdev_pri_ats_enable(pdev) != 0)
goto out;
dev_data->ats.enabled = true;
@@ -1852,6 +1862,10 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
if (!iommu)
return ERR_PTR(-ENODEV);
+ /* Not registered yet? */
+ if (!iommu->iommu.ops)
+ return ERR_PTR(-ENODEV);
+
if (dev_iommu_priv_get(dev))
return &iommu->iommu;
@@ -1938,25 +1952,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
amd_iommu_domain_flush_complete(domain);
}
-int __init amd_iommu_init_api(void)
-{
- int err;
-
- err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
- if (err)
- return err;
-#ifdef CONFIG_ARM_AMBA
- err = bus_set_iommu(&amba_bustype, &amd_iommu_ops);
- if (err)
- return err;
-#endif
- err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
- if (err)
- return err;
-
- return 0;
-}
-
/*****************************************************************************
*
* The following functions belong to the exported interface of AMD IOMMU
@@ -1989,12 +1984,12 @@ static void protection_domain_free(struct protection_domain *domain)
if (!domain)
return;
- if (domain->id)
- domain_id_free(domain->id);
-
if (domain->iop.pgtbl_cfg.tlb)
free_io_pgtable_ops(&domain->iop.iop.ops);
+ if (domain->id)
+ domain_id_free(domain->id);
+
kfree(domain);
}
@@ -2012,8 +2007,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
if (mode != PAGE_MODE_NONE) {
pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root)
+ if (!pt_root) {
+ domain_id_free(domain->id);
return -ENOMEM;
+ }
}
amd_iommu_domain_set_pgtable(domain, pt_root, mode);
@@ -2021,6 +2018,24 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
return 0;
}
+static int protection_domain_init_v2(struct protection_domain *domain)
+{
+ spin_lock_init(&domain->lock);
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&domain->dev_list);
+
+ domain->flags |= PD_GIOV_MASK;
+
+ if (domain_enable_v2(domain, 1)) {
+ domain_id_free(domain->id);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static struct protection_domain *protection_domain_alloc(unsigned int type)
{
struct io_pgtable_ops *pgtbl_ops;
@@ -2048,6 +2063,9 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
case AMD_IOMMU_V1:
ret = protection_domain_init_v1(domain, mode);
break;
+ case AMD_IOMMU_V2:
+ ret = protection_domain_init_v2(domain);
+ break;
default:
ret = -EINVAL;
}
@@ -2056,8 +2074,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
goto out_err;
pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
- if (!pgtbl_ops)
+ if (!pgtbl_ops) {
+ domain_id_free(domain->id);
goto out_err;
+ }
return domain;
out_err:
@@ -2175,13 +2195,13 @@ static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- if (ops->map)
+ if (ops->map_pages)
domain_flush_np_cache(domain, iova, size);
}
-static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
- phys_addr_t paddr, size_t page_size, int iommu_prot,
- gfp_t gfp)
+static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
@@ -2197,8 +2217,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- if (ops->map)
- ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
+ if (ops->map_pages) {
+ ret = ops->map_pages(ops, iova, paddr, pgsize,
+ pgcount, prot, gfp, mapped);
+ }
return ret;
}
@@ -2224,9 +2246,9 @@ static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
iommu_iotlb_gather_add_range(gather, iova, size);
}
-static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
- size_t page_size,
- struct iommu_iotlb_gather *gather)
+static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
@@ -2236,9 +2258,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
(domain->iop.mode == PAGE_MODE_NONE))
return 0;
- r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+ r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0;
- amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+ if (r)
+ amd_iommu_iotlb_gather_add_page(dom, gather, iova, r);
return r;
}
@@ -2252,7 +2275,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
return ops->iova_to_phys(ops, iova);
}
-static bool amd_iommu_capable(enum iommu_cap cap)
+static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
{
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
@@ -2400,8 +2423,8 @@ const struct iommu_ops amd_iommu_ops = {
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
- .map = amd_iommu_map,
- .unmap = amd_iommu_unmap,
+ .map_pages = amd_iommu_map_pages,
+ .unmap_pages = amd_iommu_unmap_pages,
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
.iova_to_phys = amd_iommu_iova_to_phys,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
@@ -2448,11 +2471,10 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
}
EXPORT_SYMBOL(amd_iommu_domain_direct_map);
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
+static int domain_enable_v2(struct protection_domain *domain, int pasids)
{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
- int levels, ret;
+ int levels;
/* Number of GCR3 table levels required */
for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
@@ -2461,7 +2483,25 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
- spin_lock_irqsave(&domain->lock, flags);
+ domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (domain->gcr3_tbl == NULL)
+ return -ENOMEM;
+
+ domain->glx = levels;
+ domain->flags |= PD_IOMMUV2_MASK;
+
+ amd_iommu_domain_update(domain);
+
+ return 0;
+}
+
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+{
+ struct protection_domain *pdom = to_pdomain(dom);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&pdom->lock, flags);
/*
* Save us all sanity checks whether devices already in the
@@ -2469,24 +2509,14 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
* devices attached when it is switched into IOMMUv2 mode.
*/
ret = -EBUSY;
- if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
+ if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
goto out;
- ret = -ENOMEM;
- domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
- if (domain->gcr3_tbl == NULL)
- goto out;
-
- domain->glx = levels;
- domain->flags |= PD_IOMMUV2_MASK;
-
- amd_iommu_domain_update(domain);
-
- ret = 0;
+ if (!pdom->gcr3_tbl)
+ ret = domain_enable_v2(pdom, pasids);
out:
- spin_unlock_irqrestore(&domain->lock, flags);
-
+ spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);