diff options
author | Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> | 2023-01-17 12:27:18 +0300 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-01-19 13:07:22 +0300 |
commit | 263b2ba5fc93c875129e0d2b4034d7d8a34b3d39 (patch) | |
tree | 2526b4b3c8f76eba922575cf4087a31e93d8bdbd /drivers/accel/ivpu/ivpu_mmu_context.c | |
parent | 35b137630f08d913fc2e33df33ccc2570dff3f7d (diff) | |
download | linux-263b2ba5fc93c875129e0d2b4034d7d8a34b3d39.tar.xz |
accel/ivpu: Add Intel VPU MMU support
VPU Memory Management Unit is based on ARM MMU-600.
It allows the creation of multiple virtual address spaces for
the device and map noncontinuous host memory (there is no dedicated
memory on the VPU).
Address space is implemented as a struct ivpu_mmu_context, it has an ID,
drm_mm allocator for VPU addresses and struct ivpu_mmu_pgtable that
holds actual 3-level, 4KB page table.
Context with ID 0 (global context) is created upon driver initialization
and it's mainly used for mapping memory required to execute
the firmware.
Contexts with non-zero IDs are user contexts allocated each time
the devices is open()-ed and they map command buffers and other
workload-related memory.
Workloads executing in a given contexts have access only
to the memory mapped in this context.
This patch is has two main files:
- ivpu_mmu_context.c handles MMU page tables and memory mapping
- ivpu_mmu.c implements a driver that programs the MMU device
Co-developed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Co-developed-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Signed-off-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-3-jacek.lawrynowicz@linux.intel.com
Diffstat (limited to 'drivers/accel/ivpu/ivpu_mmu_context.c')
-rw-r--r-- | drivers/accel/ivpu/ivpu_mmu_context.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c new file mode 100644 index 000000000000..8ce9b12ac356 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/highmem.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) +#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) +#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) +#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) +#define IVPU_MMU_ENTRY_FLAG_NG BIT(11) +#define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_USER BIT(6) +#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) +#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) +#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) + +#define IVPU_MMU_PAGE_SIZE SZ_4K +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) + +#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 +#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) +#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK) +#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ + IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) + +static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + dma_addr_t pgd_dma; + u64 *pgd; + + pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); + if (!pgd) + return -ENOMEM; + + pgtable->pgd = pgd; + pgtable->pgd_dma = pgd_dma; + + return 0; +} + +static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + int pgd_index, pmd_index; + + for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { + u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; + u64 *pmd = pgtable->pgd_entries[pgd_index]; + + if (!pmd_entries) + continue; + + for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { + if (pmd_entries[pmd_index]) + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, + pmd_entries[pmd_index], + pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + kfree(pmd_entries); + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], + pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, + pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); +} + +static u64* +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) +{ + u64 **pmd_entries; + dma_addr_t pmd_dma; + u64 *pmd; + + if (pgtable->pgd_entries[pgd_index]) + return pgtable->pgd_entries[pgd_index]; + + pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); + if (!pmd) + return NULL; + + pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pmd_entries) + goto err_free_pgd; + + pgtable->pgd_entries[pgd_index] = pmd; + pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; + pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; + + return pmd; + +err_free_pgd: + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); + return NULL; +} + +static u64* +ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, + int pgd_index, int pmd_index) +{ + dma_addr_t pte_dma; + u64 *pte; + + if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) + return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; + + pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); + if (!pte) + return NULL; + + pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; + pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; + + return pte; +} + +static int +ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, int prot) +{ + u64 *pte; + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Allocate PMD - second level page table if needed */ + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) + return -ENOMEM; + + /* Allocate PTE - third level page table if needed */ + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); + if (!pte) + return -ENOMEM; + + /* Update PTE - third level page table with DMA address */ + pte[pte_index] = dma_addr | prot; + + return 0; +} + +static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) +{ + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Update PTE with dummy physical address and clear flags */ + ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; +} + +static void +ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + u64 end_addr = vpu_addr + size; + u64 *pgd = ctx->pgtable.pgd; + + /* Align to PMD entry (2 MB) */ + vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); + + while (vpu_addr < end_addr) { + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; + u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; + + while (vpu_addr < end_addr && vpu_addr < pmd_end) { + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; + + clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); + vpu_addr += IVPU_MMU_PTE_MAP_SIZE; + } + clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); + } + clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); +} + +static int +ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) +{ + while (size) { + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + + if (ret) + return ret; + + vpu_addr += IVPU_MMU_PAGE_SIZE; + dma_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } + + return 0; +} + +static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + while (size) { + ivpu_mmu_context_unmap_page(ctx, vpu_addr); + vpu_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) +{ + struct scatterlist *sg; + int prot; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + return -EINVAL; + /* + * VPU is only 32 bit, but DMA engine is 38 bit + * Ranges < 2 GB are reserved for VPU internal registers + * Limit range to 8 GB + */ + if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) + return -EINVAL; + + prot = IVPU_MMU_ENTRY_MAPPED; + if (llc_coherent) + prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + u64 dma_addr = sg_dma_address(sg) - sg->offset; + size_t size = sg_dma_len(sg) + sg->offset; + + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); + if (ret) { + ivpu_err(vdev, "Failed to map context pages\n"); + mutex_unlock(&ctx->lock); + return ret; + } + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + return ret; +} + +void +ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt) +{ + struct scatterlist *sg; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + size_t size = sg_dma_len(sg) + sg->offset; + + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); +} + +int +ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, + 0, range->start, range->end, DRM_MM_INSERT_BEST); +} + +void +ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + drm_mm_remove_node(node); +} + +static int +ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +{ + u64 start, end; + int ret; + + mutex_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->bo_list); + + ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); + if (ret) + return ret; + + if (!context_id) { + start = vdev->hw->ranges.global_low.start; + end = vdev->hw->ranges.global_high.end; + } else { + start = vdev->hw->ranges.user_low.start; + end = vdev->hw->ranges.user_high.end; + } + + drm_mm_init(&ctx->mm, start, end - start); + ctx->id = context_id; + + return 0; +} + +static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); + + mutex_destroy(&ctx->lock); + ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); + drm_mm_takedown(&ctx->mm); +} + +int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); +} + +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_fini(vdev, &vdev->gctx); +} + +void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) +{ + struct ivpu_file_priv *file_priv; + + xa_lock(&vdev->context_xa); + + file_priv = xa_load(&vdev->context_xa, ssid); + if (file_priv) + file_priv->has_mmu_faults = true; + + xa_unlock(&vdev->context_xa); +} + +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) +{ + int ret; + + drm_WARN_ON(&vdev->drm, !ctx_id); + + ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); + if (ret) { + ivpu_err(vdev, "Failed to initialize context: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set page table: %d\n", ret); + goto err_context_fini; + } + + return 0; + +err_context_fini: + ivpu_mmu_context_fini(vdev, ctx); + return ret; +} + +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->id); + + ivpu_mmu_clear_pgtable(vdev, ctx->id); + ivpu_mmu_context_fini(vdev, ctx); +} |