diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 23:20:53 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 23:20:53 +0300 |
commit | f23cdfcd04f7c044ee47dac04484b8d289088776 (patch) | |
tree | ab4c292b83cf4534b08cdd258cda38893f2cfa39 /drivers/iommu/amd/io_pgtable_v2.c | |
parent | 706eacadd5c5cc13510ba69eea2917c2ce5ffa99 (diff) | |
parent | 38713c6028a3172c4c256512c3fbcfc799fe2d43 (diff) | |
download | linux-f23cdfcd04f7c044ee47dac04484b8d289088776.tar.xz |
Merge tag 'iommu-updates-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
- remove the bus_set_iommu() interface which became unnecesary because
of IOMMU per-device probing
- make the dma-iommu.h header private
- Intel VT-d changes from Lu Baolu:
- Decouple PASID and PRI from SVA
- Add ESRTPS & ESIRTPS capability check
- Cleanups
- Apple DART support for the M1 Pro/MAX SOCs
- support for AMD IOMMUv2 page-tables for the DMA-API layer.
The v2 page-tables are compatible with the x86 CPU page-tables. Using
them for DMA-API prepares support for hardware-assisted IOMMU
virtualization
- support for MT6795 Helio X10 M4Us in the Mediatek IOMMU driver
- some smaller fixes and cleanups
* tag 'iommu-updates-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (59 commits)
iommu/vt-d: Avoid unnecessary global DMA cache invalidation
iommu/vt-d: Avoid unnecessary global IRTE cache invalidation
iommu/vt-d: Rename cap_5lp_support to cap_fl5lp_support
iommu/vt-d: Remove pasid_set_eafe()
iommu/vt-d: Decouple PASID & PRI enabling from SVA
iommu/vt-d: Remove unnecessary SVA data accesses in page fault path
dt-bindings: iommu: arm,smmu-v3: Relax order of interrupt names
iommu: dart: Support t6000 variant
iommu/io-pgtable-dart: Add DART PTE support for t6000
iommu/io-pgtable: Add DART subpage protection support
iommu/io-pgtable: Move Apple DART support to its own file
iommu/mediatek: Add support for MT6795 Helio X10 M4Us
iommu/mediatek: Introduce new flag TF_PORT_TO_ADDR_MT8173
dt-bindings: mediatek: Add bindings for MT6795 M4U
iommu/iova: Fix module config properly
iommu/amd: Fix sparse warning
iommu/amd: Remove outdated comment
iommu/amd: Free domain ID after domain_flush_pages
iommu/amd: Free domain id in error path
iommu/virtio: Fix compile error with viommu_capable()
...
Diffstat (limited to 'drivers/iommu/amd/io_pgtable_v2.c')
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index 000000000000..8638ddf6fb3b --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Author: Vasant Hegde <vasant.hegde@amd.com> + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/bitops.h> +#include <linux/io-pgtable.h> +#include <linux/kernel.h> + +#include <asm/barrier.h> + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ +#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ +#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ +#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ +#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ +#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ +#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ +#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ +#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ + +#define MAX_PTRS_PER_PAGE 512 + +#define IOMMU_PAGE_SIZE_2M BIT_ULL(21) +#define IOMMU_PAGE_SIZE_1G BIT_ULL(30) + + +static inline int get_pgtable_level(void) +{ + /* 5 level page table is not supported */ + return PAGE_MODE_4_LEVEL; +} + +static inline bool is_large_pte(u64 pte) +{ + return (pte & IOMMU_PAGE_PSE); +} + +static inline void *alloc_pgtable_page(void) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static inline u64 set_pgtable_attr(u64 *page) +{ + u64 prot; + + prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; + prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + return (iommu_virt_to_phys(page) | prot); +} + +static inline void *get_pgtable_pte(u64 pte) +{ + return iommu_phys_to_virt(pte & PM_ADDR_MASK); +} + +static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) +{ + u64 pte; + + pte = __sme_set(paddr & PM_ADDR_MASK); + pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; + pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + if (prot & IOMMU_PROT_IW) + pte |= IOMMU_PAGE_RW; + + /* Large page */ + if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) + pte |= IOMMU_PAGE_PSE; + + return pte; +} + +static inline u64 get_alloc_page_size(u64 size) +{ + if (size >= IOMMU_PAGE_SIZE_1G) + return IOMMU_PAGE_SIZE_1G; + + if (size >= IOMMU_PAGE_SIZE_2M) + return IOMMU_PAGE_SIZE_2M; + + return PAGE_SIZE; +} + +static inline int page_size_to_level(u64 pg_size) +{ + if (pg_size == IOMMU_PAGE_SIZE_1G) + return PAGE_MODE_3_LEVEL; + if (pg_size == IOMMU_PAGE_SIZE_2M) + return PAGE_MODE_2_LEVEL; + + return PAGE_MODE_1_LEVEL; +} + +static inline void free_pgtable_page(u64 *pt) +{ + free_page((unsigned long)pt); +} + +static void free_pgtable(u64 *pt, int level) +{ + u64 *p; + int i; + + for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; + + if (is_large_pte(pt[i])) + continue; + + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = get_pgtable_pte(pt[i]); + if (level > 2) + free_pgtable(p, level - 1); + else + free_pgtable_page(p); + } + + free_pgtable_page(pt); +} + +/* Allocate page table */ +static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, + unsigned long pg_size, bool *updated) +{ + u64 *pte, *page; + int level, end_level; + + level = get_pgtable_level() - 1; + end_level = page_size_to_level(pg_size); + pte = &pgd[PM_LEVEL_INDEX(level, iova)]; + iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); + + while (level >= end_level) { + u64 __pte, __npte; + + __pte = *pte; + + if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { + /* Unmap large pte */ + cmpxchg64(pte, *pte, 0ULL); + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte)) { + page = alloc_pgtable_page(); + if (!page) + return NULL; + + __npte = set_pgtable_attr(page); + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_pgtable_page(page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + level -= 1; + pte = get_pgtable_pte(__pte); + pte = &pte[PM_LEVEL_INDEX(level, iova)]; + } + + /* Tear down existing pte entries */ + if (IOMMU_PTE_PRESENT(*pte)) { + u64 *__pte; + + *updated = true; + __pte = get_pgtable_pte(*pte); + cmpxchg64(pte, *pte, 0ULL); + if (pg_size == IOMMU_PAGE_SIZE_1G) + free_pgtable(__pte, end_level - 1); + else if (pg_size == IOMMU_PAGE_SIZE_2M) + free_pgtable_page(__pte); + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. + * If there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, unsigned long *page_size) +{ + u64 *pte; + int level; + + level = get_pgtable_level() - 1; + pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; + /* Default page size is 4K */ + *page_size = PAGE_SIZE; + + while (level) { + /* Not present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Walk to the next level */ + pte = get_pgtable_pte(*pte); + pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; + + /* Large page */ + if (is_large_pte(*pte)) { + if (level == PAGE_MODE_3_LEVEL) + *page_size = IOMMU_PAGE_SIZE_1G; + else if (level == PAGE_MODE_2_LEVEL) + *page_size = IOMMU_PAGE_SIZE_2M; + else + return NULL; /* Wrongly set PSE bit in PTE */ + + break; + } + + level -= 1; + } + + return pte; +} + +static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) +{ + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + u64 *pte; + unsigned long map_size; + unsigned long mapped_size = 0; + unsigned long o_iova = iova; + size_t size = pgcount << __ffs(pgsize); + int count = 0; + int ret = 0; + bool updated = false; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) + return -EINVAL; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + while (mapped_size < size) { + map_size = get_alloc_page_size(pgsize); + pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + if (!pte) { + ret = -EINVAL; + goto out; + } + + *pte = set_pte_attr(paddr, map_size, prot); + + count++; + iova += map_size; + paddr += map_size; + mapped_size += map_size; + } + +out: + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&pdom->domain, 0); + else + amd_iommu_flush_page(&pdom->domain, 0, o_iova); + } + + if (mapped) + *mapped += mapped_size; + + return ret; +} + +static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + unsigned long unmap_size; + unsigned long unmapped = 0; + size_t size = pgcount << __ffs(pgsize); + u64 *pte; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) + return 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (!pte) + return unmapped; + + *pte = 0ULL; + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + return unmapped; +} + +static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v2_tlb_flush_all(void *cookie) +{ +} + +static void v2_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v2_flush_ops = { + .tlb_flush_all = v2_tlb_flush_all, + .tlb_flush_walk = v2_tlb_flush_walk, + .tlb_add_page = v2_tlb_add_page, +}; + +static void v2_free_pgtable(struct io_pgtable *iop) +{ + struct protection_domain *pdom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + + pdom = container_of(pgtable, struct protection_domain, iop); + if (!(pdom->flags & PD_IOMMUV2_MASK)) + return; + + /* + * Make changes visible to IOMMUs. No need to clear gcr3 entry + * as gcr3 table is already freed. + */ + amd_iommu_domain_update(pdom); + + /* Free page table */ + free_pgtable(pgtable->pgd, get_pgtable_level()); +} + +static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + struct protection_domain *pdom = (struct protection_domain *)cookie; + int ret; + + pgtable->pgd = alloc_pgtable_page(); + if (!pgtable->pgd) + return NULL; + + ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); + if (ret) + goto err_free_pgd; + + pgtable->iop.ops.map_pages = iommu_v2_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v2_flush_ops; + + return &pgtable->iop; + +err_free_pgd: + free_pgtable_page(pgtable->pgd); + + return NULL; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { + .alloc = v2_alloc_pgtable, + .free = v2_free_pgtable, +}; |