diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 10 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 38 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 76 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 415 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 135 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu_v2.c | 2 |
7 files changed, 592 insertions, 86 deletions
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index a935f8f4b974..773d8aa00283 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5b1019dab328..1d0a70c85333 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -94,6 +94,7 @@ #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) #define FEATURE_GAM_VAPIC (1ULL<<21) +#define FEATURE_GIOSUP (1ULL<<48) #define FEATURE_EPHSUP (1ULL<<50) #define FEATURE_SNP (1ULL<<63) @@ -276,6 +277,8 @@ * 512GB Pages are not supported due to a hardware bug */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +/* 4K, 2MB, 1G page sizes are supported */ +#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -376,6 +379,7 @@ #define DTE_FLAG_IW (1ULL << 62) #define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GIOV (1ULL << 54) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) @@ -434,6 +438,7 @@ #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page translation */ #define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ +#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ @@ -456,6 +461,8 @@ struct irq_remap_table { /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; +extern const struct iommu_ops amd_iommu_ops; + /* IVRS indicates that pre-boot remapping was enabled */ extern bool amdr_ivrs_remap_support; @@ -526,7 +533,8 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ + u64 *pgd; /* v2 pgtable pgd pointer */ }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 79e0286da6ce..1a2d425bf568 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -95,8 +95,6 @@ * out of it. */ -extern const struct iommu_ops amd_iommu_ops; - /* * structure describing one IOMMU in the ACPI table. Typically followed by one * or more ivhd_entrys. @@ -2068,6 +2066,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); + if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (!iommu_feature(iommu, FEATURE_GIOSUP) || + !iommu_feature(iommu, FEATURE_GT)) { + pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } else if (iommu_default_passthrough()) { + pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } + } + if (is_rd890_iommu(iommu->dev)) { int i, j; @@ -2146,6 +2155,8 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } + if (amd_iommu_pgtable == AMD_IOMMU_V2) + pr_info("V2 page table enabled\n"); } static int __init amd_iommu_init_pci(void) @@ -3359,17 +3370,30 @@ static int __init parse_amd_iommu_intr(char *str) static int __init parse_amd_iommu_options(char *str) { - for (; *str; ++str) { + if (!str) + return -EINVAL; + + while (*str) { if (strncmp(str, "fullflush", 9) == 0) { pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); iommu_set_dma_strict(); - } - if (strncmp(str, "force_enable", 12) == 0) + } else if (strncmp(str, "force_enable", 12) == 0) { amd_iommu_force_enable = true; - if (strncmp(str, "off", 3) == 0) + } else if (strncmp(str, "off", 3) == 0) { amd_iommu_disabled = true; - if (strncmp(str, "force_isolation", 15) == 0) + } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; + } else if (strncmp(str, "pgtbl_v1", 8) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V1; + } else if (strncmp(str, "pgtbl_v2", 8) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V2; + } else { + pr_notice("Unknown option - '%s'\n", str); + } + + str += strcspn(str, ","); + while (*str == ',') + str++; } return 1; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 7d4b61e5db47..ace0e9b8b913 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -360,8 +360,9 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct protection_domain *dom = io_pgtable_ops_to_domain(ops); LIST_HEAD(freelist); @@ -369,39 +370,47 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(iova, size)); - BUG_ON(!IS_ALIGNED(paddr, size)); + BUG_ON(!IS_ALIGNED(iova, pgsize)); + BUG_ON(!IS_ALIGNED(paddr, pgsize)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(size); - pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); + while (pgcount > 0) { + count = PAGE_SIZE_PTE_COUNT(pgsize); + pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated); - ret = -ENOMEM; - if (!pte) - goto out; + ret = -ENOMEM; + if (!pte) + goto out; - for (i = 0; i < count; ++i) - free_clear_pte(&pte[i], pte[i], &freelist); + for (i = 0; i < count; ++i) + free_clear_pte(&pte[i], pte[i], &freelist); - if (!list_empty(&freelist)) - updated = true; + if (!list_empty(&freelist)) + updated = true; - if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; - } else - __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + if (count > 1) { + __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; + } else + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; - for (i = 0; i < count; ++i) - pte[i] = __pte; + for (i = 0; i < count; ++i) + pte[i] = __pte; + + iova += pgsize; + paddr += pgsize; + pgcount--; + if (mapped) + *mapped += pgsize; + } ret = 0; @@ -426,17 +435,18 @@ out: return ret; } -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, - unsigned long iova, - size_t size, - struct iommu_iotlb_gather *gather) +static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; + size_t size = pgcount << __ffs(pgsize); - BUG_ON(!is_power_of_2(size)); + BUG_ON(!is_power_of_2(pgsize)); unmapped = 0; @@ -448,14 +458,14 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; + } else { + return unmapped; } iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } - BUG_ON(unmapped && !is_power_of_2(unmapped)); - return unmapped; } @@ -514,8 +524,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v1_flush_ops; - pgtable->iop.ops.map = iommu_v1_map_page; - pgtable->iop.ops.unmap = iommu_v1_unmap_page; + pgtable->iop.ops.map_pages = iommu_v1_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pgtable->iop; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index 000000000000..8638ddf6fb3b --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Author: Vasant Hegde <vasant.hegde@amd.com> + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/bitops.h> +#include <linux/io-pgtable.h> +#include <linux/kernel.h> + +#include <asm/barrier.h> + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ +#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ +#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ +#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ +#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ +#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ +#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ +#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ +#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ + +#define MAX_PTRS_PER_PAGE 512 + +#define IOMMU_PAGE_SIZE_2M BIT_ULL(21) +#define IOMMU_PAGE_SIZE_1G BIT_ULL(30) + + +static inline int get_pgtable_level(void) +{ + /* 5 level page table is not supported */ + return PAGE_MODE_4_LEVEL; +} + +static inline bool is_large_pte(u64 pte) +{ + return (pte & IOMMU_PAGE_PSE); +} + +static inline void *alloc_pgtable_page(void) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static inline u64 set_pgtable_attr(u64 *page) +{ + u64 prot; + + prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; + prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + return (iommu_virt_to_phys(page) | prot); +} + +static inline void *get_pgtable_pte(u64 pte) +{ + return iommu_phys_to_virt(pte & PM_ADDR_MASK); +} + +static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) +{ + u64 pte; + + pte = __sme_set(paddr & PM_ADDR_MASK); + pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; + pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + if (prot & IOMMU_PROT_IW) + pte |= IOMMU_PAGE_RW; + + /* Large page */ + if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) + pte |= IOMMU_PAGE_PSE; + + return pte; +} + +static inline u64 get_alloc_page_size(u64 size) +{ + if (size >= IOMMU_PAGE_SIZE_1G) + return IOMMU_PAGE_SIZE_1G; + + if (size >= IOMMU_PAGE_SIZE_2M) + return IOMMU_PAGE_SIZE_2M; + + return PAGE_SIZE; +} + +static inline int page_size_to_level(u64 pg_size) +{ + if (pg_size == IOMMU_PAGE_SIZE_1G) + return PAGE_MODE_3_LEVEL; + if (pg_size == IOMMU_PAGE_SIZE_2M) + return PAGE_MODE_2_LEVEL; + + return PAGE_MODE_1_LEVEL; +} + +static inline void free_pgtable_page(u64 *pt) +{ + free_page((unsigned long)pt); +} + +static void free_pgtable(u64 *pt, int level) +{ + u64 *p; + int i; + + for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; + + if (is_large_pte(pt[i])) + continue; + + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = get_pgtable_pte(pt[i]); + if (level > 2) + free_pgtable(p, level - 1); + else + free_pgtable_page(p); + } + + free_pgtable_page(pt); +} + +/* Allocate page table */ +static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, + unsigned long pg_size, bool *updated) +{ + u64 *pte, *page; + int level, end_level; + + level = get_pgtable_level() - 1; + end_level = page_size_to_level(pg_size); + pte = &pgd[PM_LEVEL_INDEX(level, iova)]; + iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); + + while (level >= end_level) { + u64 __pte, __npte; + + __pte = *pte; + + if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { + /* Unmap large pte */ + cmpxchg64(pte, *pte, 0ULL); + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte)) { + page = alloc_pgtable_page(); + if (!page) + return NULL; + + __npte = set_pgtable_attr(page); + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_pgtable_page(page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + level -= 1; + pte = get_pgtable_pte(__pte); + pte = &pte[PM_LEVEL_INDEX(level, iova)]; + } + + /* Tear down existing pte entries */ + if (IOMMU_PTE_PRESENT(*pte)) { + u64 *__pte; + + *updated = true; + __pte = get_pgtable_pte(*pte); + cmpxchg64(pte, *pte, 0ULL); + if (pg_size == IOMMU_PAGE_SIZE_1G) + free_pgtable(__pte, end_level - 1); + else if (pg_size == IOMMU_PAGE_SIZE_2M) + free_pgtable_page(__pte); + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. + * If there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, unsigned long *page_size) +{ + u64 *pte; + int level; + + level = get_pgtable_level() - 1; + pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; + /* Default page size is 4K */ + *page_size = PAGE_SIZE; + + while (level) { + /* Not present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Walk to the next level */ + pte = get_pgtable_pte(*pte); + pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; + + /* Large page */ + if (is_large_pte(*pte)) { + if (level == PAGE_MODE_3_LEVEL) + *page_size = IOMMU_PAGE_SIZE_1G; + else if (level == PAGE_MODE_2_LEVEL) + *page_size = IOMMU_PAGE_SIZE_2M; + else + return NULL; /* Wrongly set PSE bit in PTE */ + + break; + } + + level -= 1; + } + + return pte; +} + +static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) +{ + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + u64 *pte; + unsigned long map_size; + unsigned long mapped_size = 0; + unsigned long o_iova = iova; + size_t size = pgcount << __ffs(pgsize); + int count = 0; + int ret = 0; + bool updated = false; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) + return -EINVAL; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + while (mapped_size < size) { + map_size = get_alloc_page_size(pgsize); + pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + if (!pte) { + ret = -EINVAL; + goto out; + } + + *pte = set_pte_attr(paddr, map_size, prot); + + count++; + iova += map_size; + paddr += map_size; + mapped_size += map_size; + } + +out: + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&pdom->domain, 0); + else + amd_iommu_flush_page(&pdom->domain, 0, o_iova); + } + + if (mapped) + *mapped += mapped_size; + + return ret; +} + +static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + unsigned long unmap_size; + unsigned long unmapped = 0; + size_t size = pgcount << __ffs(pgsize); + u64 *pte; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) + return 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (!pte) + return unmapped; + + *pte = 0ULL; + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + return unmapped; +} + +static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v2_tlb_flush_all(void *cookie) +{ +} + +static void v2_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v2_flush_ops = { + .tlb_flush_all = v2_tlb_flush_all, + .tlb_flush_walk = v2_tlb_flush_walk, + .tlb_add_page = v2_tlb_add_page, +}; + +static void v2_free_pgtable(struct io_pgtable *iop) +{ + struct protection_domain *pdom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + + pdom = container_of(pgtable, struct protection_domain, iop); + if (!(pdom->flags & PD_IOMMUV2_MASK)) + return; + + /* + * Make changes visible to IOMMUs. No need to clear gcr3 entry + * as gcr3 table is already freed. + */ + amd_iommu_domain_update(pdom); + + /* Free page table */ + free_pgtable(pgtable->pgd, get_pgtable_level()); +} + +static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + struct protection_domain *pdom = (struct protection_domain *)cookie; + int ret; + + pgtable->pgd = alloc_pgtable_page(); + if (!pgtable->pgd) + return NULL; + + ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); + if (ret) + goto err_free_pgd; + + pgtable->iop.ops.map_pages = iommu_v2_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v2_flush_ops; + + return &pgtable->iop; + +err_free_pgd: + free_pgtable_page(pgtable->pgd); + + return NULL; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { + .alloc = v2_alloc_pgtable, + .free = v2_free_pgtable, +}; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 67de9c42205a..65856e401949 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -64,10 +64,6 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); -/* - * Domain for untranslated devices - only allocated - * if iommu=pt passed on kernel cmd line. - */ const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); @@ -83,6 +79,7 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static int domain_enable_v2(struct protection_domain *domain, int pasids); /**************************************************************************** * @@ -1595,6 +1592,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; flags |= tmp; + + if (domain->flags & PD_GIOV_MASK) + pte_root |= DTE_FLAG_GIOV; } flags &= ~DEV_DOMID_MASK; @@ -1648,6 +1648,10 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; + /* Override supported page sizes */ + if (domain->flags & PD_GIOV_MASK) + domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; + /* Update device table */ set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); @@ -1692,7 +1696,7 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev) pci_disable_pasid(pdev); } -static int pdev_iommuv2_enable(struct pci_dev *pdev) +static int pdev_pri_ats_enable(struct pci_dev *pdev) { int ret; @@ -1755,11 +1759,19 @@ static int attach_device(struct device *dev, struct iommu_domain *def_domain = iommu_get_dma_domain(dev); ret = -EINVAL; - if (def_domain->type != IOMMU_DOMAIN_IDENTITY) + + /* + * In case of using AMD_IOMMU_V1 page table mode and the device + * is enabling for PPR/ATS support (using v2 table), + * we need to make sure that the domain type is identity map. + */ + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + def_domain->type != IOMMU_DOMAIN_IDENTITY) { goto out; + } if (dev_data->iommu_v2) { - if (pdev_iommuv2_enable(pdev) != 0) + if (pdev_pri_ats_enable(pdev) != 0) goto out; dev_data->ats.enabled = true; @@ -1972,12 +1984,12 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; - if (domain->id) - domain_id_free(domain->id); - if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); + if (domain->id) + domain_id_free(domain->id); + kfree(domain); } @@ -1995,8 +2007,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) if (mode != PAGE_MODE_NONE) { pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) + if (!pt_root) { + domain_id_free(domain->id); return -ENOMEM; + } } amd_iommu_domain_set_pgtable(domain, pt_root, mode); @@ -2004,6 +2018,24 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return 0; } +static int protection_domain_init_v2(struct protection_domain *domain) +{ + spin_lock_init(&domain->lock); + domain->id = domain_id_alloc(); + if (!domain->id) + return -ENOMEM; + INIT_LIST_HEAD(&domain->dev_list); + + domain->flags |= PD_GIOV_MASK; + + if (domain_enable_v2(domain, 1)) { + domain_id_free(domain->id); + return -ENOMEM; + } + + return 0; +} + static struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; @@ -2031,6 +2063,9 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) case AMD_IOMMU_V1: ret = protection_domain_init_v1(domain, mode); break; + case AMD_IOMMU_V2: + ret = protection_domain_init_v2(domain); + break; default: ret = -EINVAL; } @@ -2039,8 +2074,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) goto out_err; pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); - if (!pgtbl_ops) + if (!pgtbl_ops) { + domain_id_free(domain->id); goto out_err; + } return domain; out_err: @@ -2158,13 +2195,13 @@ static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - if (ops->map) + if (ops->map_pages) domain_flush_np_cache(domain, iova, size); } -static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, size_t page_size, int iommu_prot, - gfp_t gfp) +static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int iommu_prot, gfp_t gfp, size_t *mapped) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; @@ -2180,8 +2217,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - if (ops->map) - ret = ops->map(ops, iova, paddr, page_size, prot, gfp); + if (ops->map_pages) { + ret = ops->map_pages(ops, iova, paddr, pgsize, + pgcount, prot, gfp, mapped); + } return ret; } @@ -2207,9 +2246,9 @@ static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain, iommu_iotlb_gather_add_range(gather, iova, size); } -static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size, - struct iommu_iotlb_gather *gather) +static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; @@ -2219,9 +2258,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, (domain->iop.mode == PAGE_MODE_NONE)) return 0; - r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; + r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0; - amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); + if (r) + amd_iommu_iotlb_gather_add_page(dom, gather, iova, r); return r; } @@ -2383,8 +2423,8 @@ const struct iommu_ops amd_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .detach_dev = amd_iommu_detach_device, - .map = amd_iommu_map, - .unmap = amd_iommu_unmap, + .map_pages = amd_iommu_map_pages, + .unmap_pages = amd_iommu_unmap_pages, .iotlb_sync_map = amd_iommu_iotlb_sync_map, .iova_to_phys = amd_iommu_iova_to_phys, .flush_iotlb_all = amd_iommu_flush_iotlb_all, @@ -2431,11 +2471,10 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) } EXPORT_SYMBOL(amd_iommu_domain_direct_map); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ +static int domain_enable_v2(struct protection_domain *domain, int pasids) { - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int levels, ret; + int levels; /* Number of GCR3 table levels required */ for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) @@ -2444,7 +2483,25 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) if (levels > amd_iommu_max_glx_val) return -EINVAL; - spin_lock_irqsave(&domain->lock, flags); + domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + return -ENOMEM; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + + amd_iommu_domain_update(domain); + + return 0; +} + +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +{ + struct protection_domain *pdom = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&pdom->lock, flags); /* * Save us all sanity checks whether devices already in the @@ -2452,24 +2509,14 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) * devices attached when it is switched into IOMMUv2 mode. */ ret = -EBUSY; - if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) - goto out; - - ret = -ENOMEM; - domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) + if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) goto out; - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; - - amd_iommu_domain_update(domain); - - ret = 0; + if (!pdom->gcr3_tbl) + ret = domain_enable_v2(pdom, pasids); out: - spin_unlock_irqrestore(&domain->lock, flags); - + spin_unlock_irqrestore(&pdom->lock, flags); return ret; } EXPORT_SYMBOL(amd_iommu_domain_enable_v2); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 696d5555be57..6a1f02c62dff 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -777,6 +777,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) if (dev_state->domain == NULL) goto out_free_states; + /* See iommu_is_default_domain() */ + dev_state->domain->type = IOMMU_DOMAIN_IDENTITY; amd_iommu_domain_direct_map(dev_state->domain); ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); |