diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 259 |
1 files changed, 149 insertions, 110 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9c85a90be293..502b94fb116a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,6 +31,7 @@ #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_gmc.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) +{ + int r; + struct drm_amdgpu_bo_list_entry *info = NULL; + + r = amdgpu_bo_create_list_entry_array(data, &info); + if (r) + return r; + + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, + &p->bo_list); + if (r) + goto error_free; + + kvfree(info); + return 0; + +error_free: + if (info) + kvfree(info); + + return r; +} + +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; unsigned size, num_ibs = 0; @@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; + case AMDGPU_CHUNK_ID_BO_HANDLES: + size = sizeof(struct drm_amdgpu_bo_list_in); + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { + ret = -EINVAL; + goto free_partial_kdata; + } + + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + if (ret) + goto free_partial_kdata; + + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); + + /* Use this opportunity to fill in task info for the vm */ + amdgpu_vm_set_task_info(vm); + return 0; free_all_kdata: @@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, } retry: - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_bo_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; @@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ update_bytes_moved_vis = - adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_ttm_placement_from_domain(bo, other); + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_bo_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) @@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && lobj->user_pages) { - amdgpu_ttm_placement_from_domain(bo, - AMDGPU_GEM_DOMAIN_CPU); + amdgpu_bo_placement_from_domain(bo, + AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; @@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - unsigned i, tries = 10; struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; + unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (p->bo_list) { - amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ + if (cs->in.bo_list_handle) { + if (p->bo_list) + return -EINVAL; + + r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, + &p->bo_list); + if (r) + return r; + } else if (!p->bo_list) { + /* Create a empty bo_list when no handle is provided */ + r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, + &p->bo_list); + if (r) + return r; } + amdgpu_bo_list_get_list(p->bo_list, &p->validated); + if (p->bo_list->first_userptr != p->bo_list->num_entries) + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, while (1) { struct list_head need_pages; - unsigned i; r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); @@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Without a BO list we don't have userptr BOs */ - if (!p->bo_list) - break; - INIT_LIST_HEAD(&need_pages); - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo; - - e = &p->bo_list->array[i]; - bo = e->robj; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = e->robj; if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, &e->user_invalidated) && e->user_pages) { @@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - if (p->bo_list) { - struct amdgpu_vm *vm = &fpriv->vm; - unsigned i; + gds = p->bo_list->gds_obj; + gws = p->bo_list->gws_obj; + oa = p->bo_list->oa_obj; - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; - for (i = 0; i < p->bo_list->num_entries; i++) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; - - p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); - } - } else { - gds = p->adev->gds.gds_gfx_bo; - gws = p->adev->gds.gws_gfx_bo; - oa = p->adev->gds.oa_gfx_bo; - } + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->bo_va = amdgpu_vm_bo_find(vm, e->robj); if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds); @@ -700,18 +737,13 @@ error_validate: error_free_pages: - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - e = &p->bo_list->array[i]; - - if (!e->user_pages) - continue; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + if (!e->user_pages) + continue; - release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages); - kvfree(e->user_pages); - } + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages); + kvfree(e->user_pages); } return r; @@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) { - struct amdgpu_device *adev = p->adev; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; - int i, r; + int r; r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) @@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) return r; } - if (p->bo_list) { - for (i = 0; i < p->bo_list->num_entries; i++) { - struct dma_fence *f; - - /* ignore duplicates */ - bo = p->bo_list->array[i].robj; - if (!bo) - continue; + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct dma_fence *f; - bo_va = p->bo_list->array[i].bo_va; - if (bo_va == NULL) - continue; + /* ignore duplicates */ + bo = e->robj; + if (!bo) + continue; - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; + bo_va = e->bo_va; + if (bo_va == NULL) + continue; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; - } + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + f = bo_va->last_pt_update; + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + if (r) + return r; } r = amdgpu_vm_handle_moved(adev, vm); @@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_vm_debug && p->bo_list) { + if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ - for (i = 0; i < p->bo_list->num_entries; i++) { + amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ - bo = p->bo_list->array[i].robj; - if (!bo) + if (!e->robj) continue; - amdgpu_vm_bo_invalidate(adev, bo, false); + amdgpu_vm_bo_invalidate(adev, e->robj, false); } } @@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->job->ring; + struct amdgpu_ring *ring = p->ring; int r; /* Only for UVD/VCE VM emulation */ - if (p->job->ring->funcs->parse_cs) { + if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { unsigned i, j; for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, offset = m->start * AMDGPU_GPU_PAGE_SIZE; kptr += va_start - offset; - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - - r = amdgpu_ring_parse_cs(ring, p, j); - if (r) - return r; + if (p->ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, j); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, j); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } j++; } @@ -983,10 +1020,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } } - if (parser->job->ring && parser->job->ring != ring) + if (parser->ring && parser->ring != ring) return -EINVAL; - parser->job->ring = ring; + parser->ring = ring; r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, @@ -1005,11 +1042,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, /* UVD & VCE fw doesn't support user fences */ if (parser->job->uf_addr && ( - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || + parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1160,31 +1197,30 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { - struct amdgpu_ring *ring = p->job->ring; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_ring *ring = p->ring; struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + enum drm_sched_priority priority; + struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; - unsigned i; uint64_t seq; int r; amdgpu_mn_lock(p->mn); - if (p->bo_list) { - for (i = p->bo_list->first_userptr; - i < p->bo_list->num_entries; ++i) { - struct amdgpu_bo *bo = p->bo_list->array[i].robj; - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - amdgpu_mn_unlock(p->mn); - return -ERESTARTSYS; - } + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = e->robj; + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + amdgpu_mn_unlock(p->mn); + return -ERESTARTSYS; } } job = p->job; p->job = NULL; - r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); + r = drm_sched_job_init(&job->base, entity, p->filp); if (r) { amdgpu_job_free(job); amdgpu_mn_unlock(p->mn); @@ -1192,7 +1228,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } job->owner = p->filp; - job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); @@ -1210,11 +1245,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); + amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); + priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); + ring = to_amdgpu_ring(entity->rq->sched); + amdgpu_ring_priority_get(ring, priority); + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); @@ -1605,7 +1644,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); if (r) return r; |