diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/nouveau/Kconfig | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 117 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 580 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 112 |
5 files changed, 836 insertions, 3 deletions
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index c02a13406a81..4b75ad40dd80 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -56,6 +56,13 @@ config NOUVEAU_DEBUG_DEFAULT help Selects the default debug level +config NOUVEAU_DEBUG_MMU + bool "Enable additional MMU debugging" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to enable verbose MMU debug output. + config DRM_NOUVEAU_BACKLIGHT bool "Support for backlight control" depends on DRM_NOUVEAU diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index eb6704a2fc8f..ccd87d508d04 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -1,6 +1,7 @@ #ifndef __NVKM_MMU_H__ #define __NVKM_MMU_H__ #include <core/subdev.h> +#include <core/memory.h> #include <core/mm.h> struct nvkm_gpuobj; struct nvkm_mem; @@ -11,6 +12,8 @@ struct nvkm_vm_pgt { }; struct nvkm_vma { + struct nvkm_memory *memory; + struct nvkm_tags *tags; struct nvkm_vm *vm; struct nvkm_mm_node *node; union { @@ -24,6 +27,7 @@ struct nvkm_vm { const struct nvkm_vmm_func *func; struct nvkm_mmu *mmu; const char *name; + u32 debug; struct kref kref; struct mutex mutex; @@ -58,6 +62,25 @@ void nvkm_vm_map_at(struct nvkm_vma *, u64 offset, struct nvkm_mem *); void nvkm_vm_unmap(struct nvkm_vma *); void nvkm_vm_unmap_at(struct nvkm_vma *, u64 offset, u64 length); +int nvkm_vmm_boot(struct nvkm_vmm *); + +struct nvkm_vmm_map { + struct nvkm_memory *memory; + u64 offset; + + struct nvkm_mm_node *mem; + struct scatterlist *sgl; + dma_addr_t *dma; + u64 off; + + const struct nvkm_vmm_page *page; + + struct nvkm_tags *tags; + u64 next; + u64 type; + u64 ctag; +}; + struct nvkm_mmu { const struct nvkm_mmu_func *func; struct nvkm_subdev subdev; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index 22264d3db22f..536187952372 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -213,6 +213,36 @@ nvkm_mmu_ptc_get(struct nvkm_mmu *mmu, u32 size, u32 align, bool zero) return pt; } +static void +nvkm_vm_map_(const struct nvkm_vmm_page *page, struct nvkm_vma *vma, u64 delta, + struct nvkm_mem *mem, nvkm_vmm_pte_func fn, + struct nvkm_vmm_map *map) +{ + struct nvkm_vmm *vmm = vma->vm; + void *argv = NULL; + u32 argc = 0; + int ret; + + map->memory = mem->memory; + map->page = page; + + if (vmm->func->valid) { + ret = vmm->func->valid(vmm, argv, argc, map); + if (WARN_ON(ret)) + return; + } + + mutex_lock(&vmm->mutex); + nvkm_vmm_ptes_map(vmm, page, ((u64)vma->node->offset << 12) + delta, + (u64)vma->node->length << 12, map, fn); + mutex_unlock(&vmm->mutex); + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + vma->memory = nvkm_memory_ref(map->memory); + vma->tags = map->tags; +} + void nvkm_mmu_ptc_dump(struct nvkm_mmu *mmu) { @@ -251,6 +281,7 @@ nvkm_mmu_ptc_init(struct nvkm_mmu *mmu) void nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node) { + const struct nvkm_vmm_page *page = vma->vm->func->page; struct nvkm_vm *vm = vma->vm; struct nvkm_mmu *mmu = vm->mmu; struct nvkm_mm_node *r = node->mem; @@ -262,6 +293,14 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node) u32 max = 1 << (mmu->func->pgt_bits - bits); u32 end, len; + if (page->desc->func->unmap) { + struct nvkm_vmm_map map = { .mem = node->mem }; + while (page->shift != vma->node->type) + page++; + nvkm_vm_map_(page, vma, delta, node, page->desc->func->mem, &map); + return; + } + delta = 0; while (r) { u64 phys = (u64)r->offset << 12; @@ -297,6 +336,7 @@ static void nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length, struct nvkm_mem *mem) { + const struct nvkm_vmm_page *page = vma->vm->func->page; struct nvkm_vm *vm = vma->vm; struct nvkm_mmu *mmu = vm->mmu; int big = vma->node->type != mmu->func->spg_shift; @@ -311,6 +351,14 @@ nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length, int i; struct scatterlist *sg; + if (page->desc->func->unmap) { + struct nvkm_vmm_map map = { .sgl = mem->sg->sgl }; + while (page->shift != vma->node->type) + page++; + nvkm_vm_map_(page, vma, delta, mem, page->desc->func->sgl, &map); + return; + } + for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) { struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; sglen = sg_dma_len(sg) >> PAGE_SHIFT; @@ -355,6 +403,7 @@ static void nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length, struct nvkm_mem *mem) { + const struct nvkm_vmm_page *page = vma->vm->func->page; struct nvkm_vm *vm = vma->vm; struct nvkm_mmu *mmu = vm->mmu; dma_addr_t *list = mem->pages; @@ -367,6 +416,14 @@ nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length, u32 max = 1 << (mmu->func->pgt_bits - bits); u32 end, len; + if (page->desc->func->unmap) { + struct nvkm_vmm_map map = { .dma = mem->pages }; + while (page->shift != vma->node->type) + page++; + nvkm_vm_map_(page, vma, delta, mem, page->desc->func->dma, &map); + return; + } + while (num) { struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; @@ -415,6 +472,17 @@ nvkm_vm_unmap_at(struct nvkm_vma *vma, u64 delta, u64 length) u32 max = 1 << (mmu->func->pgt_bits - bits); u32 end, len; + if (vm->func->page->desc->func->unmap) { + const struct nvkm_vmm_page *page = vm->func->page; + while (page->shift != vma->node->type) + page++; + mutex_lock(&vm->mutex); + nvkm_vmm_ptes_unmap(vm, page, (vma->node->offset << 12) + delta, + vma->node->length << 12, false); + mutex_unlock(&vm->mutex); + return; + } + while (num) { struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; @@ -440,6 +508,9 @@ void nvkm_vm_unmap(struct nvkm_vma *vma) { nvkm_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); + + nvkm_memory_tags_put(vma->memory, vma->vm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); } static void @@ -509,6 +580,22 @@ nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, return ret; } + if (vm->func->page->desc->func->unmap) { + const struct nvkm_vmm_page *page = vm->func->page; + while (page->shift != page_shift) + page++; + + ret = nvkm_vmm_ptes_get(vm, page, vma->node->offset << 12, + vma->node->length << 12); + if (ret) { + nvkm_mm_free(&vm->mm, &vma->node); + mutex_unlock(&vm->mutex); + return ret; + } + + goto done; + } + fpde = (vma->node->offset >> mmu->func->pgt_bits); lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; @@ -530,8 +617,11 @@ nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, return ret; } } +done: mutex_unlock(&vm->mutex); + vma->memory = NULL; + vma->tags = NULL; vma->vm = NULL; nvkm_vm_ref(vm, &vma->vm, NULL); vma->offset = (u64)vma->node->offset << 12; @@ -551,11 +641,25 @@ nvkm_vm_put(struct nvkm_vma *vma) vm = vma->vm; mmu = vm->mmu; + nvkm_memory_tags_put(vma->memory, mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + fpde = (vma->node->offset >> mmu->func->pgt_bits); lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; mutex_lock(&vm->mutex); + if (vm->func->page->desc->func->unmap) { + const struct nvkm_vmm_page *page = vm->func->page; + while (page->shift != vma->node->type) + page++; + + nvkm_vmm_ptes_put(vm, page, vma->node->offset << 12, + vma->node->length << 12); + goto done; + } + nvkm_vm_unmap_pgt(vm, vma->node->type != mmu->func->spg_shift, fpde, lpde); +done: nvkm_mm_free(&vm->mm, &vma->node); mutex_unlock(&vm->mutex); @@ -569,6 +673,9 @@ nvkm_vm_boot(struct nvkm_vm *vm, u64 size) struct nvkm_memory *pgt; int ret; + if (vm->func->page->desc->func->unmap) + return nvkm_vmm_boot(vm); + ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, (size >> mmu->func->spg_shift) * 8, 0x1000, true, &pgt); if (ret == 0) { @@ -660,7 +767,7 @@ nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_memory *inst) if (ret) return ret; - if (ref->mmu->func->map_pgt) { + if (!ref->func->page->desc->func->unmap && ref->mmu->func->map_pgt) { for (i = ref->fpde; i <= ref->lpde; i++) ref->mmu->func->map_pgt(ref, i, ref->pgt[i - ref->fpde].mem); } @@ -672,8 +779,12 @@ nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_memory *inst) if (*ptr) { if ((*ptr)->func->part && inst) (*ptr)->func->part(*ptr, inst); - if ((*ptr)->bootstrapped && inst) - nvkm_memory_unref(&(*ptr)->pgt[0].mem[0]); + if ((*ptr)->bootstrapped && inst) { + if (!(*ptr)->func->page->desc->func->unmap) { + nvkm_memory_unref(&(*ptr)->pgt[0].mem[0]); + (*ptr)->bootstrapped = false; + } + } kref_put(&(*ptr)->refcount, nvkm_vm_del); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 7e00b9adca05..46c7fecf0054 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -67,9 +67,559 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, return pgt; } +struct nvkm_vmm_iter { + const struct nvkm_vmm_page *page; + const struct nvkm_vmm_desc *desc; + struct nvkm_vmm *vmm; + u64 cnt; + u16 max, lvl; + u32 pte[NVKM_VMM_LEVELS_MAX]; + struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX]; + int flush; +}; + +#ifdef CONFIG_NOUVEAU_DEBUG_MMU +static const char * +nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc) +{ + switch (desc->type) { + case PGD: return "PGD"; + case PGT: return "PGT"; + case SPT: return "SPT"; + case LPT: return "LPT"; + default: + return "UNKNOWN"; + } +} + +static void +nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf) +{ + int lvl; + for (lvl = it->max; lvl >= 0; lvl--) { + if (lvl >= it->lvl) + buf += sprintf(buf, "%05x:", it->pte[lvl]); + else + buf += sprintf(buf, "xxxxx:"); + } +} + +#define TRA(i,f,a...) do { \ + char _buf[NVKM_VMM_LEVELS_MAX * 7]; \ + struct nvkm_vmm_iter *_it = (i); \ + nvkm_vmm_trace(_it, _buf); \ + VMM_TRACE(_it->vmm, "%s "f, _buf, ##a); \ +} while(0) +#else +#define TRA(i,f,a...) +#endif + +static inline void +nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it) +{ + it->flush = min(it->flush, it->max - it->lvl); +} + +static inline void +nvkm_vmm_flush(struct nvkm_vmm_iter *it) +{ + if (it->flush != NVKM_VMM_LEVELS_MAX) { + if (it->vmm->func->flush) { + TRA(it, "flush: %d", it->flush); + it->vmm->func->flush(it->vmm, it->flush); + } + it->flush = NVKM_VMM_LEVELS_MAX; + } +} + +static void +nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc[it->lvl].type == SPT; + struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1]; + struct nvkm_vmm_pt *pgt = it->pt[it->lvl]; + struct nvkm_mmu_pt *pt = pgt->pt[type]; + struct nvkm_vmm *vmm = it->vmm; + u32 pdei = it->pte[it->lvl + 1]; + + /* Recurse up the tree, unreferencing/destroying unneeded PDs. */ + it->lvl++; + if (--pgd->refs[0]) { + const struct nvkm_vmm_desc_func *func = desc[it->lvl].func; + /* PD has other valid PDEs, so we need a proper update. */ + TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + pgt->pt[type] = NULL; + if (!pgt->refs[!type]) { + /* PDE no longer required. */ + if (pgd->pt[0]) { + if (pgt->sparse) { + func->sparse(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE; + } else { + func->unmap(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NULL; + } + } else { + /* Special handling for Tesla-class GPUs, + * where there's no central PD, but each + * instance has its own embedded PD. + */ + func->pde(vmm, pgd, pdei); + pgd->pde[pdei] = NULL; + } + } else { + /* PDE was pointing at dual-PTs and we're removing + * one of them, leaving the other in place. + */ + func->pde(vmm, pgd, pdei); + } + + /* GPU may have cached the PTs, flush before freeing. */ + nvkm_vmm_flush_mark(it); + nvkm_vmm_flush(it); + } else { + /* PD has no valid PDEs left, so we can just destroy it. */ + nvkm_vmm_unref_pdes(it); + } + + /* Destroy PD/PT. */ + TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt); + if (!pgt->refs[!type]) + nvkm_vmm_pt_del(&pgt); + it->lvl--; +} + +static void +nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and drop reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] -= pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that still have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + break; + } + continue; + } + + /* As there's no more non-UNMAPPED SPTEs left in the range + * covered by a number of LPTEs, the LPTEs once again take + * control over their address range. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + break; + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); + pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* If the MMU supports it, restore the LPTE to the + * INVALID state to tell the MMU there is no point + * trying to fetch the corresponding SPTEs. + */ + TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); + pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Drop PTE references. */ + pgt->refs[type] -= ptes; + + /* Dual-PTs need special handling, unless PDE becoming invalid. */ + if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) + nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); + + /* PT no longer neeed? Destroy it. */ + if (!pgt->refs[type]) { + it->lvl++; + TRA(it, "%s empty", nvkm_vmm_desc_type(desc)); + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; /* PTE writes for unmap() not necessary. */ + } + + return true; +} + +static void +nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and increase reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] += pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that already have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + break; + } + continue; + } + + /* As there are now non-UNMAPPED SPTEs in the range covered + * by a number of LPTEs, we need to transfer control of the + * address range to the SPTEs. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + break; + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + const u32 spti = pteb * sptn; + const u32 sptc = ptes * sptn; + /* The entire LPTE is marked as sparse, we need + * to make sure that the SPTEs are too. + */ + TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc); + desc->func->sparse(vmm, pgt->pt[1], spti, sptc); + /* Sparse LPTEs prevent SPTEs from being accessed. */ + TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* MMU supports blocking SPTEs by marking an LPTE + * as INVALID. We need to reverse that here. + */ + TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Take PTE references. */ + pgt->refs[type] += ptes; + + /* Dual-PTs need special handling. */ + if (desc->type == SPT) + nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); + + return true; +} + +static void +nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, + struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes) +{ + if (desc->type == PGD) { + while (ptes--) + pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; + } else + if (desc->type == LPT) { + memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + } +} + +static bool +nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + const bool zero = !pgt->sparse && !desc->func->invalid; + struct nvkm_vmm *vmm = it->vmm; + struct nvkm_mmu *mmu = vmm->mmu; + struct nvkm_mmu_pt *pt; + u32 pten = 1 << desc->bits; + u32 pteb, ptei, ptes; + u32 size = desc->size * pten; + + pgd->refs[0]++; + + pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero); + if (!pgt->pt[type]) { + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; + } + + if (zero) + goto done; + + pt = pgt->pt[type]; + + if (desc->type == LPT && pgt->refs[1]) { + /* SPT already exists covering the same range as this LPT, + * which means we need to be careful that any LPTEs which + * overlap valid SPTEs are unmapped as opposed to invalid + * or sparse, which would prevent the MMU from looking at + * the SPTEs on some GPUs. + */ + for (ptei = pteb = 0; ptei < pten; pteb = ptei) { + bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { + bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + if (spte != next) + break; + } + + if (!spte) { + if (pgt->sparse) + desc->func->sparse(vmm, pt, pteb, ptes); + else + desc->func->invalid(vmm, pt, pteb, ptes); + memset(&pgt->pte[pteb], 0x00, ptes); + } else { + desc->func->unmap(vmm, pt, pteb, ptes); + while (ptes--) + pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + } + } + } else { + if (pgt->sparse) { + nvkm_vmm_sparse_ptes(desc, pgt, 0, pten); + desc->func->sparse(vmm, pt, 0, pten); + } else { + desc->func->invalid(vmm, pt, 0, pten); + } + } + +done: + TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc)); + it->desc[it->lvl].func->pde(it->vmm, pgd, pdei); + nvkm_vmm_flush_mark(it); + return true; +} + +static bool +nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + + pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page); + if (!pgt) { + if (!pgd->refs[0]) + nvkm_vmm_unref_pdes(it); + return false; + } + + pgd->pde[pdei] = pgt; + return true; +} + +static inline u64 +nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, const char *name, bool ref, + bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32), + nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map, + nvkm_vmm_pxe_func CLR_PTES) +{ + const struct nvkm_vmm_desc *desc = page->desc; + struct nvkm_vmm_iter it; + u64 bits = addr >> page->shift; + + it.page = page; + it.desc = desc; + it.vmm = vmm; + it.cnt = size >> page->shift; + it.flush = NVKM_VMM_LEVELS_MAX; + + /* Deconstruct address into PTE indices for each mapping level. */ + for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) { + it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1); + bits >>= desc[it.lvl].bits; + } + it.max = --it.lvl; + it.pt[it.max] = vmm->pd; + + it.lvl = 0; + TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name, + addr, size, page->shift, it.cnt); + it.lvl = it.max; + + /* Depth-first traversal of page tables. */ + while (it.cnt) { + struct nvkm_vmm_pt *pgt = it.pt[it.lvl]; + const int type = desc->type == SPT; + const u32 pten = 1 << desc->bits; + const u32 ptei = it.pte[0]; + const u32 ptes = min_t(u64, it.cnt, pten - ptei); + + /* Walk down the tree, finding page tables for each level. */ + for (; it.lvl; it.lvl--) { + const u32 pdei = it.pte[it.lvl]; + struct nvkm_vmm_pt *pgd = pgt; + + /* Software PT. */ + if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) { + if (!nvkm_vmm_ref_swpt(&it, pgd, pdei)) + goto fail; + } + it.pt[it.lvl - 1] = pgt = pgd->pde[pdei]; + + /* Hardware PT. + * + * This is a separate step from above due to GF100 and + * newer having dual page tables at some levels, which + * are refcounted independently. + */ + if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) { + if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei)) + goto fail; + } + } + + /* Handle PTE updates. */ + if (!REF_PTES || REF_PTES(&it, ptei, ptes)) { + struct nvkm_mmu_pt *pt = pgt->pt[type]; + if (MAP_PTES || CLR_PTES) { + if (MAP_PTES) + MAP_PTES(vmm, pt, ptei, ptes, map); + else + CLR_PTES(vmm, pt, ptei, ptes); + nvkm_vmm_flush_mark(&it); + } + } + + /* Walk back up the tree to the next position. */ + it.pte[it.lvl] += ptes; + it.cnt -= ptes; + if (it.cnt) { + while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) { + it.pte[it.lvl++] = 0; + it.pte[it.lvl]++; + } + } + }; + + nvkm_vmm_flush(&it); + return ~0ULL; + +fail: + /* Reconstruct the failure address so the caller is able to + * reverse any partially completed operations. + */ + addr = it.pte[it.max--]; + do { + addr = addr << desc[it.max].bits; + addr |= it.pte[it.max]; + } while (it.max--); + + return addr << page->shift; +} + +void +nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +void +nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + nvkm_vmm_iter(vmm, page, addr, size, "map", false, + NULL, func, map, NULL); +} + +void +nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "unref", false, + nvkm_vmm_unref_ptes, NULL, NULL, NULL); +} + +int +nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, + nvkm_vmm_ref_ptes, NULL, NULL, NULL); + if (fail != ~0ULL) { + if (fail != addr) + nvkm_vmm_ptes_put(vmm, page, addr, fail - addr); + return -ENOMEM; + } + return 0; +} + void nvkm_vmm_dtor(struct nvkm_vmm *vmm) { + if (vmm->bootstrapped) { + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + + while (page[1].shift) + page++; + + nvkm_mmu_ptc_dump(vmm->mmu); + nvkm_vmm_ptes_put(vmm, page, vmm->start, limit); + } + if (vmm->nullp) { dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024, vmm->nullp, vmm->null); @@ -94,6 +644,7 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, vmm->func = func; vmm->mmu = mmu; vmm->name = name; + vmm->debug = mmu->subdev.debug; kref_init(&vmm->kref); __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key); @@ -150,3 +701,32 @@ nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, return -ENOMEM; return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); } + +static bool +nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm); + return false; +} + +int +nvkm_vmm_boot(struct nvkm_vmm *vmm) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + int ret; + + while (page[1].shift) + page++; + + ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit); + if (ret) + return ret; + + nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, + nvkm_vmm_boot_ptes, NULL, NULL, NULL); + vmm->bootstrapped = true; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 504408d8014b..042d84c5e950 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -2,6 +2,7 @@ #define __NVKM_VMM_H__ #include "priv.h" #include <core/memory.h> +enum nvkm_memory_target; struct nvkm_vmm_pt { /* Some GPUs have a mapping level with a dual page tables to @@ -49,7 +50,23 @@ struct nvkm_vmm_pt { u8 pte[]; }; +typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, + struct nvkm_mmu_pt *, u32 ptei, u32 ptes); +typedef void (*nvkm_vmm_pde_func)(struct nvkm_vmm *, + struct nvkm_vmm_pt *, u32 pdei); +typedef void (*nvkm_vmm_pte_func)(struct nvkm_vmm *, struct nvkm_mmu_pt *, + u32 ptei, u32 ptes, struct nvkm_vmm_map *); + struct nvkm_vmm_desc_func { + nvkm_vmm_pxe_func invalid; + nvkm_vmm_pxe_func unmap; + nvkm_vmm_pxe_func sparse; + + nvkm_vmm_pde_func pde; + + nvkm_vmm_pte_func mem; + nvkm_vmm_pte_func dma; + nvkm_vmm_pte_func sgl; }; extern const struct nvkm_vmm_desc_func gf100_vmm_pgd; @@ -106,6 +123,11 @@ struct nvkm_vmm_func { int (*join)(struct nvkm_vmm *, struct nvkm_memory *inst); void (*part)(struct nvkm_vmm *, struct nvkm_memory *inst); + int (*aper)(enum nvkm_memory_target); + int (*valid)(struct nvkm_vmm *, void *argv, u32 argc, + struct nvkm_vmm_map *); + void (*flush)(struct nvkm_vmm *, int depth); + u64 page_block; const struct nvkm_vmm_page page[]; }; @@ -122,6 +144,15 @@ int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32 pd_header, u64 addr, u64 size, struct lock_class_key *, const char *name, struct nvkm_vmm *); void nvkm_vmm_dtor(struct nvkm_vmm *); +void nvkm_vmm_ptes_put(struct nvkm_vmm *, const struct nvkm_vmm_page *, + u64 addr, u64 size); +int nvkm_vmm_ptes_get(struct nvkm_vmm *, const struct nvkm_vmm_page *, + u64 addr, u64 size); +void nvkm_vmm_ptes_map(struct nvkm_vmm *, const struct nvkm_vmm_page *, + u64 addr, u64 size, struct nvkm_vmm_map *, + nvkm_vmm_pte_func); +void nvkm_vmm_ptes_unmap(struct nvkm_vmm *, const struct nvkm_vmm_page *, + u64 addr, u64 size, bool sparse); int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, u64, u64, void *, u32, struct lock_class_key *, @@ -176,4 +207,85 @@ int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); + +#define VMM_PRINT(l,v,p,f,a...) do { \ + struct nvkm_vmm *_vmm = (v); \ + if (CONFIG_NOUVEAU_DEBUG >= (l) && _vmm->debug >= (l)) { \ + nvkm_printk_(&_vmm->mmu->subdev, 0, p, "%s: "f"\n", \ + _vmm->name, ##a); \ + } \ +} while(0) +#define VMM_DEBUG(v,f,a...) VMM_PRINT(NV_DBG_DEBUG, (v), info, f, ##a) +#define VMM_TRACE(v,f,a...) VMM_PRINT(NV_DBG_TRACE, (v), info, f, ##a) +#define VMM_SPAM(v,f,a...) VMM_PRINT(NV_DBG_SPAM , (v), dbg, f, ##a) + +#define VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL,BASE,SIZE,NEXT) do { \ + nvkm_kmap((PT)->memory); \ + while (PTEN) { \ + u64 _ptes = ((SIZE) - MAP->off) >> MAP->page->shift; \ + u64 _addr = ((BASE) + MAP->off); \ + \ + if (_ptes > PTEN) { \ + MAP->off += PTEN << MAP->page->shift; \ + _ptes = PTEN; \ + } else { \ + MAP->off = 0; \ + NEXT; \ + } \ + \ + VMM_SPAM(VMM, "ITER %08x %08x PTE(s)", PTEI, (u32)_ptes); \ + \ + FILL(VMM, PT, PTEI, _ptes, MAP, _addr); \ + PTEI += _ptes; \ + PTEN -= _ptes; \ + }; \ + nvkm_done((PT)->memory); \ +} while(0) + +#define VMM_MAP_ITER_MEM(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + ((u64)MAP->mem->offset << NVKM_RAM_MM_SHIFT), \ + ((u64)MAP->mem->length << NVKM_RAM_MM_SHIFT), \ + (MAP->mem = MAP->mem->next)) +#define VMM_MAP_ITER_DMA(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + *MAP->dma, PAGE_SIZE, MAP->dma++) +#define VMM_MAP_ITER_SGL(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + sg_dma_address(MAP->sgl), sg_dma_len(MAP->sgl), \ + (MAP->sgl = sg_next(MAP->sgl))) + +#define VMM_FO(m,o,d,c,b) nvkm_fo##b((m)->memory, (o), (d), (c)) +#define VMM_WO(m,o,d,c,b) nvkm_wo##b((m)->memory, (o), (d)) +#define VMM_XO(m,v,o,d,c,b,fn,f,a...) do { \ + const u32 _pteo = (o); u##b _data = (d); \ + VMM_SPAM((v), " %010llx "f, (m)->addr + _pteo, _data, ##a); \ + VMM_##fn((m), (m)->base + _pteo, _data, (c), b); \ +} while(0) + +#define VMM_WO032(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 32, WO, "%08x") +#define VMM_FO032(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 32, FO, "%08x %08x", (c)) + +#define VMM_WO064(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 64, WO, "%016llx") +#define VMM_FO064(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 64, FO, "%016llx %08x", (c)) + +#define VMM_XO128(m,v,o,lo,hi,c,f,a...) do { \ + u32 _pteo = (o), _ptes = (c); \ + const u64 _addr = (m)->addr + _pteo; \ + VMM_SPAM((v), " %010llx %016llx%016llx"f, _addr, (hi), (lo), ##a); \ + while (_ptes--) { \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 0, (lo)); \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 8, (hi)); \ + _pteo += 0x10; \ + } \ +} while(0) + +#define VMM_WO128(m,v,o,lo,hi) VMM_XO128((m),(v),(o),(lo),(hi), 1, "") +#define VMM_FO128(m,v,o,lo,hi,c) do { \ + nvkm_kmap((m)->memory); \ + VMM_XO128((m),(v),(o),(lo),(hi),(c), " %08x", (c)); \ + nvkm_done((m)->memory); \ +} while(0) #endif |