summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2018-09-15 21:50:42 +0300
committerThomas Gleixner <tglx@linutronix.de>2018-09-15 21:50:42 +0300
commit9ac669fc01dbfef707ecaa6b618c0d03294cca16 (patch)
tree3207995766ab5664c60026daae5da268806a3262 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parentfbfa9260085b5b578a049a90135e5c51928c5f7f (diff)
parent67314ec7b0250290cc85eaa7a2f88a8ddb9e8547 (diff)
downloadlinux-9ac669fc01dbfef707ecaa6b618c0d03294cca16.tar.xz
Merge tag 'y2038' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground into timers/core
Pull more y2038 work from Arnd Bergman: y2038: convert more syscalls Here is another set of system call changes to prepare the change over to 64-bit time_t. As before, the strategy is to change system calls that take a 'struct timespec' argument over to 'struct __kernel_timespec', which for now is defined to be the same but will get redefined to use a 64-bit time_t argument once we are ready to modify the system call tables. The major change from previous patches is that the plan is no longer to directly use the 'compat' system calls for providing compatibility with the existing 32-bit time_t based entry points. Instead, we rename the compat code to something that makes more sense on 32-bit architectures, e.g. compat_timespec becomes old_timespec32. With the renamed types in place, change over the 'stat' and 'utimes' families of system calls, sched_rr_get_interval, recvmmsg and rt_sigtimedwait. Another series for poll, select and io_pgetevents is currently being tested.
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c263
1 files changed, 153 insertions, 110 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82312a7bc6ad..502b94fb116a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -31,6 +31,7 @@
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
@@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
return 0;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
+{
+ int r;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+
+ r = amdgpu_bo_create_list_entry_array(data, &info);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+ &p->bo_list);
+ if (r)
+ goto error_free;
+
+ kvfree(info);
+ return 0;
+
+error_free:
+ if (info)
+ kvfree(info);
+
+ return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
@@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
+ case AMDGPU_CHUNK_ID_BO_HANDLES:
+ size = sizeof(struct drm_amdgpu_bo_list_in);
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
+
+ ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ amdgpu_vm_set_task_info(vm);
+
return 0;
free_all_kdata:
@@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
return;
}
- total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+ total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
@@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
/* Do the same for visible VRAM if half of it is free */
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved < p->bytes_moved_threshold) {
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
* visible VRAM if we've depleted our allowance to do
@@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
}
retry:
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
@@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
update_bytes_moved_vis =
- adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- amdgpu_bo_in_cpu_visible_vram(bo);
- amdgpu_ttm_placement_from_domain(bo, other);
+ !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo);
+ amdgpu_bo_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
@@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
/* Check if we have user pages and nobody bound the BO already */
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
lobj->user_pages) {
- amdgpu_ttm_placement_from_domain(bo,
- AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_placement_from_domain(bo,
+ AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
@@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
- unsigned i, tries = 10;
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
+ unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
- p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
- if (p->bo_list) {
- amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+ /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+ if (cs->in.bo_list_handle) {
+ if (p->bo_list)
+ return -EINVAL;
+
+ r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+ &p->bo_list);
+ if (r)
+ return r;
+ } else if (!p->bo_list) {
+ /* Create a empty bo_list when no handle is provided */
+ r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+ &p->bo_list);
+ if (r)
+ return r;
}
+ amdgpu_bo_list_get_list(p->bo_list, &p->validated);
+ if (p->bo_list->first_userptr != p->bo_list->num_entries)
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
while (1) {
struct list_head need_pages;
- unsigned i;
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates);
@@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_free_pages;
}
- /* Without a BO list we don't have userptr BOs */
- if (!p->bo_list)
- break;
-
INIT_LIST_HEAD(&need_pages);
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo;
-
- e = &p->bo_list->array[i];
- bo = e->robj;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = e->robj;
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
@@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
- if (p->bo_list) {
- struct amdgpu_vm *vm = &fpriv->vm;
- unsigned i;
-
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+ gds = p->bo_list->gds_obj;
+ gws = p->bo_list->gws_obj;
+ oa = p->bo_list->oa_obj;
- p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
- }
- } else {
- gds = p->adev->gds.gds_gfx_bo;
- gws = p->adev->gds.gws_gfx_bo;
- oa = p->adev->gds.oa_gfx_bo;
- }
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
+ e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -700,18 +737,13 @@ error_validate:
error_free_pages:
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- e = &p->bo_list->array[i];
-
- if (!e->user_pages)
- continue;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ if (!e->user_pages)
+ continue;
- release_pages(e->user_pages,
- e->robj->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- }
+ release_pages(e->user_pages,
+ e->robj->tbo.ttm->num_pages);
+ kvfree(e->user_pages);
}
return r;
@@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
{
- struct amdgpu_device *adev = p->adev;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_list_entry *e;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
- int i, r;
+ int r;
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
@@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
return r;
}
- if (p->bo_list) {
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct dma_fence *f;
-
- /* ignore duplicates */
- bo = p->bo_list->array[i].robj;
- if (!bo)
- continue;
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_fence *f;
- bo_va = p->bo_list->array[i].bo_va;
- if (bo_va == NULL)
- continue;
+ /* ignore duplicates */
+ bo = e->robj;
+ if (!bo)
+ continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
- return r;
+ bo_va = e->bo_va;
+ if (bo_va == NULL)
+ continue;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
- if (r)
- return r;
- }
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+ f = bo_va->last_pt_update;
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ if (r)
+ return r;
}
r = amdgpu_vm_handle_moved(adev, vm);
@@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (r)
return r;
- if (amdgpu_vm_debug && p->bo_list) {
+ if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
- for (i = 0; i < p->bo_list->num_entries; i++) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
/* ignore duplicates */
- bo = p->bo_list->array[i].robj;
- if (!bo)
+ if (!e->robj)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(adev, e->robj, false);
}
}
@@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_ring *ring = p->job->ring;
+ struct amdgpu_ring *ring = p->ring;
int r;
/* Only for UVD/VCE VM emulation */
- if (p->job->ring->funcs->parse_cs) {
+ if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, j);
- if (r)
- return r;
+ if (p->ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, j);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
j++;
}
@@ -927,6 +964,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
+
+ r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+ if (r)
+ return r;
}
return amdgpu_cs_sync_rings(p);
@@ -979,10 +1020,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
}
- if (parser->job->ring && parser->job->ring != ring)
+ if (parser->ring && parser->ring != ring)
return -EINVAL;
- parser->job->ring = ring;
+ parser->ring = ring;
r = amdgpu_ib_get(adev, vm,
ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
@@ -1001,11 +1042,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
/* UVD & VCE fw doesn't support user fences */
if (parser->job->uf_addr && (
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1156,31 +1197,30 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
- struct amdgpu_ring *ring = p->job->ring;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_ring *ring = p->ring;
struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ enum drm_sched_priority priority;
+ struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
- unsigned i;
uint64_t seq;
int r;
amdgpu_mn_lock(p->mn);
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- amdgpu_mn_unlock(p->mn);
- return -ERESTARTSYS;
- }
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = e->robj;
+
+ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
+ amdgpu_mn_unlock(p->mn);
+ return -ERESTARTSYS;
}
}
job = p->job;
p->job = NULL;
- r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
+ r = drm_sched_job_init(&job->base, entity, p->filp);
if (r) {
amdgpu_job_free(job);
amdgpu_mn_unlock(p->mn);
@@ -1188,7 +1228,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
job->owner = p->filp;
- job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
@@ -1206,11 +1245,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
trace_amdgpu_cs_ioctl(job);
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
@@ -1601,7 +1644,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;