diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 305 |
1 files changed, 196 insertions, 109 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9a4e3b63f1cb..f68b7cdc370a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, /** * amdgpu_vm_grab_id - allocate the next free VMID * - * @ring: ring we want to submit job to * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). + * Allocate an id for the vm, adding fences to the sync obj as necessary. * - * Global and local mutex must be locked! + * Global mutex must be locked! */ -struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, - struct amdgpu_vm *vm) +int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync) { struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; @@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, /* check if the id is still valid */ if (vm_id->id && vm_id->last_id_use && vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) - return NULL; + return 0; /* we definately need to flush */ vm_id->pd_gpu_addr = ~0ll; @@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, /* found a free one */ vm_id->id = i; trace_amdgpu_vm_grab_id(i, ring->idx); - return NULL; + return 0; } if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { @@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, for (i = 0; i < 2; ++i) { if (choices[i]) { + struct amdgpu_fence *fence; + + fence = adev->vm_manager.active[choices[i]]; vm_id->id = choices[i]; + trace_amdgpu_vm_grab_id(choices[i], ring->idx); - return adev->vm_manager.active[choices[i]]; + return amdgpu_sync_fence(ring->adev, sync, &fence->base); } } /* should never happen */ BUG(); - return NULL; + return -EINVAL; } /** @@ -196,17 +200,29 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, */ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, - struct amdgpu_fence *updates) + struct fence *updates) { uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; + struct fence *flushed_updates = vm_id->flushed_updates; + bool is_earlier = false; - if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || - amdgpu_fence_is_earlier(vm_id->flushed_updates, updates)) { + if (flushed_updates && updates) { + BUG_ON(flushed_updates->context != updates->context); + is_earlier = (updates->seqno - flushed_updates->seqno <= + INT_MAX) ? true : false; + } + + if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || + is_earlier) { trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); - amdgpu_fence_unref(&vm_id->flushed_updates); - vm_id->flushed_updates = amdgpu_fence_ref(updates); + if (is_earlier) { + vm_id->flushed_updates = fence_get(updates); + fence_put(flushed_updates); + } + if (!flushed_updates) + vm_id->flushed_updates = fence_get(updates); vm_id->pd_gpu_addr = pd_addr; amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); } @@ -300,6 +316,15 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, } } +int amdgpu_vm_free_job(struct amdgpu_job *sched_job) +{ + int i; + for (i = 0; i < sched_job->num_ibs; i++) + amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); + kfree(sched_job->ibs); + return 0; +} + /** * amdgpu_vm_clear_bo - initially clear the page dir/table * @@ -310,7 +335,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_bo *bo) { struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; - struct amdgpu_ib ib; + struct fence *fence = NULL; + struct amdgpu_ib *ib; unsigned entries; uint64_t addr; int r; @@ -330,24 +356,33 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; - r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, &ib); - if (r) + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!ib) goto error_unreserve; - ib.length_dw = 0; - - amdgpu_vm_update_pages(adev, &ib, addr, 0, entries, 0, 0, 0); - amdgpu_vm_pad_ib(adev, &ib); - WARN_ON(ib.length_dw > 64); - - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); + r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); if (r) goto error_free; - amdgpu_bo_fence(bo, ib.fence, true); - + ib->length_dw = 0; + + amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); + amdgpu_vm_pad_ib(adev, ib); + WARN_ON(ib->length_dw > 64); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, + &amdgpu_vm_free_job, + AMDGPU_FENCE_OWNER_VM, + &fence); + if (!r) + amdgpu_bo_fence(bo, fence, true); + fence_put(fence); + if (amdgpu_enable_scheduler) { + amdgpu_bo_unreserve(bo); + return 0; + } error_free: - amdgpu_ib_free(adev, &ib); + amdgpu_ib_free(adev, ib); + kfree(ib); error_unreserve: amdgpu_bo_unreserve(bo); @@ -400,7 +435,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; - struct amdgpu_ib ib; + struct amdgpu_ib *ib; + struct fence *fence = NULL; + int r; /* padding, etc. */ @@ -413,10 +450,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (ndw > 0xfffff) return -ENOMEM; - r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!ib) + return -ENOMEM; + + r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); if (r) return r; - ib.length_dw = 0; + ib->length_dw = 0; /* walk over the address space and update the page directory */ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { @@ -436,7 +477,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ((last_pt + incr * count) != pt)) { if (count) { - amdgpu_vm_update_pages(adev, &ib, last_pde, + amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID, 0); } @@ -450,23 +491,37 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, &ib, last_pde, last_pt, count, + amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID, 0); - if (ib.length_dw != 0) { - amdgpu_vm_pad_ib(adev, &ib); - amdgpu_sync_resv(adev, &ib.sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); - WARN_ON(ib.length_dw > ndw); - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); - if (r) { - amdgpu_ib_free(adev, &ib); - return r; - } - amdgpu_bo_fence(pd, ib.fence, true); + if (ib->length_dw != 0) { + amdgpu_vm_pad_ib(adev, ib); + amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); + WARN_ON(ib->length_dw > ndw); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, + &amdgpu_vm_free_job, + AMDGPU_FENCE_OWNER_VM, + &fence); + if (r) + goto error_free; + + amdgpu_bo_fence(pd, fence, true); + fence_put(vm->page_directory_fence); + vm->page_directory_fence = fence_get(fence); + fence_put(fence); + } + + if (!amdgpu_enable_scheduler || ib->length_dw == 0) { + amdgpu_ib_free(adev, ib); + kfree(ib); } - amdgpu_ib_free(adev, &ib); return 0; + +error_free: + amdgpu_ib_free(adev, ib); + kfree(ib); + return r; } /** @@ -572,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, { uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; uint64_t last_pte = ~0, last_dst = ~0; + void *owner = AMDGPU_FENCE_OWNER_VM; unsigned count = 0; uint64_t addr; + /* sync to everything on unmapping */ + if (!(flags & AMDGPU_PTE_VALID)) + owner = AMDGPU_FENCE_OWNER_UNDEFINED; + /* walk over the address space and update the page tables */ for (addr = start; addr < end; ) { uint64_t pt_idx = addr >> amdgpu_vm_block_size; @@ -583,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, uint64_t pte; int r; - amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner); r = reservation_object_reserve_shared(pt->tbo.resv); if (r) return r; @@ -640,7 +699,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, */ static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, uint64_t start, uint64_t end, - struct amdgpu_fence *fence) + struct fence *fence) { unsigned i; @@ -670,12 +729,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, uint64_t addr, uint32_t gtt_flags, - struct amdgpu_fence **fence) + struct fence **fence) { struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; unsigned nptes, ncmds, ndw; uint32_t flags = gtt_flags; - struct amdgpu_ib ib; + struct amdgpu_ib *ib; + struct fence *f = NULL; int r; /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here @@ -722,46 +782,54 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (ndw > 0xfffff) return -ENOMEM; - r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); - if (r) - return r; - ib.length_dw = 0; - - if (!(flags & AMDGPU_PTE_VALID)) { - unsigned i; + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!ib) + return -ENOMEM; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_fence *f = vm->ids[i].last_id_use; - amdgpu_sync_fence(&ib.sync, f); - } + r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); + if (r) { + kfree(ib); + return r; } - r = amdgpu_vm_update_ptes(adev, vm, &ib, mapping->it.start, + ib->length_dw = 0; + + r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, mapping->it.last + 1, addr + mapping->offset, flags, gtt_flags); if (r) { - amdgpu_ib_free(adev, &ib); + amdgpu_ib_free(adev, ib); + kfree(ib); return r; } - amdgpu_vm_pad_ib(adev, &ib); - WARN_ON(ib.length_dw > ndw); + amdgpu_vm_pad_ib(adev, ib); + WARN_ON(ib->length_dw > ndw); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, + &amdgpu_vm_free_job, + AMDGPU_FENCE_OWNER_VM, + &f); + if (r) + goto error_free; - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); - if (r) { - amdgpu_ib_free(adev, &ib); - return r; - } amdgpu_vm_fence_pts(vm, mapping->it.start, - mapping->it.last + 1, ib.fence); + mapping->it.last + 1, f); if (fence) { - amdgpu_fence_unref(fence); - *fence = amdgpu_fence_ref(ib.fence); + fence_put(*fence); + *fence = fence_get(f); + } + fence_put(f); + if (!amdgpu_enable_scheduler) { + amdgpu_ib_free(adev, ib); + kfree(ib); } - amdgpu_ib_free(adev, &ib); - return 0; + +error_free: + amdgpu_ib_free(adev, ib); + kfree(ib); + return r; } /** @@ -794,21 +862,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, addr = 0; } - if (addr == bo_va->addr) - return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - list_for_each_entry(mapping, &bo_va->mappings, list) { + spin_lock(&vm->status_lock); + if (!list_empty(&bo_va->vm_status)) + list_splice_init(&bo_va->valids, &bo_va->invalids); + spin_unlock(&vm->status_lock); + + list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, flags, &bo_va->last_pt_update); if (r) return r; } - bo_va->addr = addr; spin_lock(&vm->status_lock); + list_splice_init(&bo_va->invalids, &bo_va->valids); list_del_init(&bo_va->vm_status); + if (!mem) + list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); return 0; @@ -861,7 +933,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; - int r; + int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->invalidated)) { @@ -878,8 +950,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); if (bo_va) - amdgpu_sync_fence(sync, bo_va->last_pt_update); - return 0; + r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); + + return r; } /** @@ -907,10 +980,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, } bo_va->vm = vm; bo_va->bo = bo; - bo_va->addr = 0; bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->mappings); + INIT_LIST_HEAD(&bo_va->valids); + INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); mutex_lock(&vm->mutex); @@ -999,12 +1072,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; - list_add(&mapping->list, &bo_va->mappings); + list_add(&mapping->list, &bo_va->invalids); interval_tree_insert(&mapping->it, &vm->va); trace_amdgpu_vm_bo_map(bo_va, mapping); - bo_va->addr = 0; - /* Make sure the page tables are allocated */ saddr >>= amdgpu_vm_block_size; eaddr >>= amdgpu_vm_block_size; @@ -1028,7 +1099,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &pt); + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS, + NULL, &pt); if (r) goto error_free; @@ -1085,17 +1158,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_vm *vm = bo_va->vm; + bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; - list_for_each_entry(mapping, &bo_va->mappings, list) { + list_for_each_entry(mapping, &bo_va->valids, list) { if (mapping->it.start == saddr) break; } - if (&mapping->list == &bo_va->mappings) { - amdgpu_bo_unreserve(bo_va->bo); - return -ENOENT; + if (&mapping->list == &bo_va->valids) { + valid = false; + + list_for_each_entry(mapping, &bo_va->invalids, list) { + if (mapping->it.start == saddr) + break; + } + + if (&mapping->list == &bo_va->invalids) { + amdgpu_bo_unreserve(bo_va->bo); + return -ENOENT; + } } mutex_lock(&vm->mutex); @@ -1103,12 +1186,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, interval_tree_remove(&mapping->it, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (bo_va->addr) { - /* clear the old address */ + if (valid) list_add(&mapping->list, &vm->freed); - } else { + else kfree(mapping); - } mutex_unlock(&vm->mutex); amdgpu_bo_unreserve(bo_va->bo); @@ -1139,16 +1220,19 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_del(&bo_va->vm_status); spin_unlock(&vm->status_lock); - list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { + list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (bo_va->addr) - list_add(&mapping->list, &vm->freed); - else - kfree(mapping); + list_add(&mapping->list, &vm->freed); + } + list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { + list_del(&mapping->list); + interval_tree_remove(&mapping->it, &vm->va); + kfree(mapping); } - amdgpu_fence_unref(&bo_va->last_pt_update); + + fence_put(bo_va->last_pt_update); kfree(bo_va); mutex_unlock(&vm->mutex); @@ -1169,12 +1253,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va; list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->addr) { - spin_lock(&bo_va->vm->status_lock); - list_del(&bo_va->vm_status); + spin_lock(&bo_va->vm->status_lock); + if (list_empty(&bo_va->vm_status)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); - } + spin_unlock(&bo_va->vm->status_lock); } } @@ -1202,6 +1284,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->va = RB_ROOT; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); pd_size = amdgpu_vm_directory_size(adev); @@ -1215,8 +1298,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) return -ENOMEM; } + vm->page_directory_fence = NULL; + r = amdgpu_bo_create(adev, pd_size, align, true, - AMDGPU_GEM_DOMAIN_VRAM, 0, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS, NULL, &vm->page_directory); if (r) return r; @@ -1263,9 +1349,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) kfree(vm->page_tables); amdgpu_bo_unref(&vm->page_directory); + fence_put(vm->page_directory_fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - amdgpu_fence_unref(&vm->ids[i].flushed_updates); + fence_put(vm->ids[i].flushed_updates); amdgpu_fence_unref(&vm->ids[i].last_id_use); } |