diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 399 |
1 files changed, 283 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e714d0..da55a78d7380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" /* * GPUVM @@ -75,7 +76,8 @@ struct amdgpu_pte_update_params { /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; /* Function which actually does the update */ - void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + void (*func)(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); /* The next two are used during VM update by CPU @@ -257,6 +259,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** + * amdgpu_vm_clear_bo - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @bo: BO to clear + * @level: level this BO is at + * + * Root PD needs to be reserved when calling this. + */ +static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, + unsigned level, bool pte_support_ats) +{ + struct ttm_operation_ctx ctx = { true, false }; + struct dma_fence *fence = NULL; + unsigned entries, ats_entries; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + uint64_t addr; + int r; + + addr = amdgpu_bo_gpu_offset(bo); + entries = amdgpu_bo_size(bo) / 8; + + if (pte_support_ats) { + if (level == adev->vm_manager.root_level) { + ats_entries = amdgpu_vm_level_shift(adev, level); + ats_entries += AMDGPU_GPU_PAGE_SHIFT; + ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + } else { + ats_entries = entries; + entries = 0; + } + } else { + ats_entries = 0; + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + r = reservation_object_reserve_shared(bo->tbo.resv); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto error; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto error; + + if (ats_entries) { + uint64_t ats_value; + + ats_value = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) + ats_value |= AMDGPU_PDE_PTE; + + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + ats_entries, 0, ats_value); + addr += ats_entries * 8; + } + + if (entries) + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + entries, 0, 0); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + + WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + + if (bo->shadow) + return amdgpu_vm_clear_bo(adev, vm, bo->shadow, + level, pte_support_ats); + + return 0; + +error_free: + amdgpu_job_free(job); + +error: + return r; +} + +/** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt *parent, uint64_t saddr, uint64_t eaddr, - unsigned level) + unsigned level, bool ats) { unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; - int r; u64 flags; - uint64_t init_value = 0; + int r; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - init_value |= AMDGPU_PDE_PTE; - - } - /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.base.bo->tbo.resv; @@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - flags, - NULL, resv, init_value, &pt); + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, resv, &pt); if (r) return r; + r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); + if (r) { + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + return r; + } + if (vm->use_cpu_for_update) { r = amdgpu_bo_kmap(pt, NULL); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); return r; } @@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level); + sub_eaddr, level, ats); if (r) return r; } @@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t last_pfn; uint64_t eaddr; + bool ats = false; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) return -EINVAL; eaddr = saddr + size - 1; - last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - last_pfn, adev->vm_manager.max_pfn); - return -EINVAL; - } + + if (vm->pte_support_ats) + ats = saddr < AMDGPU_VA_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (eaddr >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", + eaddr, adev->vm_manager.max_pfn); + return -EINVAL; + } + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level); + adev->vm_manager.root_level, ats); } /** @@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) { - return (adev->mc.real_vram_size == adev->mc.visible_vram_size); + return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); } /** @@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; + bool pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); + struct dma_fence *fence = NULL; unsigned patch_offset = 0; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; + pasid_mapping_needed = true; } + gds_switch_needed &= !!ring->funcs->emit_gds_switch; + vm_flush_needed &= !!ring->funcs->emit_vm_flush; + pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && + ring->funcs->emit_wreg; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - struct dma_fence *fence; - + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); + } + + if (pasid_mapping_needed) + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence); if (r) return r; + } + if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); - id->last_flush = fence; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); + id->last_flush = dma_fence_get(fence); + id->current_gpu_reset_count = + atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } + if (pasid_mapping_needed) { + id->pasid = job->pasid; + dma_fence_put(id->pasid_mapping); + id->pasid_mapping = dma_fence_get(fence); + } + dma_fence_put(fence); + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; @@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_do_set_ptes - helper to call the right asic function * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { @@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * Traces the parameters and calls the DMA function to copy the PTEs. */ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); - + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_copy_ptes(pe, src, count); amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); @@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: kmap addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * Write count number of PT/PD entries directly. */ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) @@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, unsigned int i; uint64_t value; + pe += (unsigned long)amdgpu_bo_kptr(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); for (i = 0; i < count; i++) { value = params->pages_addr ? amdgpu_vm_map_gart(params->pages_addr, addr) : addr; - amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); + amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); addr += incr; } } @@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; - uint64_t pd_addr, shadow_addr = 0; + struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; @@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, if (entry->huge) return; - if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - shadow = parent->base.bo->shadow; - if (shadow) - shadow_addr = amdgpu_bo_gpu_offset(shadow); - } - - for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_bo_gpu_offset(entry->base.bo); flags = AMDGPU_PTE_VALID; - amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); - if (shadow) { - pde = shadow_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); - } - - pde = pd_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); + amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); + pde = (entry - parent->entries) * 8; + if (bo->shadow) + params->func(params, bo->shadow, pde, pt, 1, 0, flags); + params->func(params, bo, pde, pt, 1, 0, flags); } /* @@ -856,7 +976,7 @@ restart: if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { @@ -870,11 +990,6 @@ restart: amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, AMDGPU_FENCE_OWNER_VM, false); - if (root->shadow) - amdgpu_sync_resv(adev, &job->sync, - root->shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); @@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - uint64_t pd_addr, pde; + uint64_t pde; /* In the case of a mixed PT the PDE must point to it*/ if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && @@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, } entry->huge = true; - amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, - &dst, &flags); + amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - if (p->func == amdgpu_vm_cpu_set_ptes) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - if (parent->base.bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); - } - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - } - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); + pde = (entry - parent->entries) * 8; + if (parent->base.bo->shadow) + p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); + p->func(p, parent->base.bo, pde, dst, 1, 0, flags); } /** @@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; - bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ for (addr = start; addr < end; addr += nptes, @@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; pt = entry->base.bo; - if (use_cpu_update) { - pe_start = (unsigned long)amdgpu_bo_kptr(pt); - } else { - if (pt->shadow) { - pe_start = amdgpu_bo_gpu_offset(pt->shadow); - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - pe_start = amdgpu_bo_gpu_offset(pt); - } - - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, + pe_start = (addr & mask) * 8; + if (pt->shadow) + params->func(params, pt->shadow, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + params->func(params, pt, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); } @@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } else { /* set page commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw - * adev->vm_manager.fragment_size; + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } @@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } spin_lock(&vm->status_lock); @@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); enable = !!atomic_read(&adev->vm_manager.num_prt_users); - adev->gart.gart_funcs->set_prt(adev, enable); + adev->gmc.gmc_funcs->set_prt(adev, enable); spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); } @@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) @@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, { struct amdgpu_prt_cb *cb; - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); @@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; + uint64_t init_pte_value = 0; struct dma_fence *f = NULL; int r; - uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats) + if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); - uint64_t init_pde_value = 0, flags; unsigned ring_instance; struct amdgpu_ring *ring; struct drm_sched_rq *rq; unsigned long size; + uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN) vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_DEFAULT_ATC - | AMDGPU_PDE_PTE; - - } - } else + } else { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else - flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW); + flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, init_pde_value, - &vm->root.base.bo); + r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; @@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + vm->pte_support_ats); + if (r) + goto error_unreserve; + vm->root.base.vm = vm; list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); list_add_tail(&vm->root.base.vm_status, &vm->evicted); @@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->root.base.bo); + error_free_root: amdgpu_bo_unref(&vm->root.base.bo->shadow); amdgpu_bo_unref(&vm->root.base.bo); @@ -2364,6 +2462,73 @@ error_free_sched_entity: } /** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + int r; + + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + return r; + + /* Sanity checks */ + if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { + r = -EINVAL; + goto error; + } + + /* Check if PD needs to be reinitialized and do it before + * changing any other state, in case it fails. + */ + if (pte_support_ats != vm->pte_support_ats) { + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + pte_support_ats); + if (r) + goto error; + } + + /* Update VM state */ + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->pte_support_ats = pte_support_ats; + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); + + if (vm->pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + + vm->pasid = 0; + } + +error: + amdgpu_bo_unreserve(vm->root.base.bo); + return r; +} + +/** * amdgpu_vm_free_levels - free PD/PT levels * * @adev: amdgpu device structure @@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev, void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; - bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; u64 fault; int i, r; + amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) amdgpu_ih_clear_fault(adev, fault); |