summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c220
1 files changed, 128 insertions, 92 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5ca905b4a0fb..0311d799a010 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -28,6 +28,7 @@
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/sync_file.h>
+#include <linux/dma-buf.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@@ -56,7 +57,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
- drm_gem_object_put_unlocked(gobj);
+ drm_gem_object_put(gobj);
size = amdgpu_bo_size(bo);
if (size != PAGE_SIZE || (data->offset + 8) > size) {
@@ -97,8 +98,7 @@ static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
return 0;
error_free:
- if (info)
- kvfree(info);
+ kvfree(info);
return r;
}
@@ -117,7 +117,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
if (cs->in.num_chunks == 0)
return 0;
- chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
@@ -144,7 +144,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
}
p->nchunks = cs->in.num_chunks;
- p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+ p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
if (!p->chunks) {
ret = -ENOMEM;
@@ -238,7 +238,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
- kfree(chunk_array);
+ kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
amdgpu_vm_set_task_info(vm);
@@ -250,11 +250,11 @@ free_all_kdata:
free_partial_kdata:
for (; i >= 0; i--)
kvfree(p->chunks[i].kdata);
- kfree(p->chunks);
+ kvfree(p->chunks);
p->chunks = NULL;
p->nchunks = 0;
free_chunk:
- kfree(chunk_array);
+ kvfree(chunk_array);
return ret;
}
@@ -298,7 +298,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
{
s64 time_us, increment_us;
u64 free_vram, total_vram, used_vram;
-
+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
@@ -315,7 +315,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
}
total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
+ used_vram = amdgpu_vram_mgr_usage(vram_man);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
@@ -325,7 +325,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
increment_us = time_us - adev->mm_stats.last_update_us;
adev->mm_stats.last_update_us = time_us;
adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
- us_upper_bound);
+ us_upper_bound);
/* This prevents the short period of low performance when the VRAM
* usage is low and the driver is in debt or doesn't have enough
@@ -362,7 +362,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
+ amdgpu_vram_mgr_vis_usage(vram_man);
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
@@ -396,26 +396,27 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
spin_unlock(&adev->mm_stats.lock);
}
-static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
- struct amdgpu_bo *bo)
+static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_cs_parser *p = param;
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
- .resv = bo->tbo.base.resv,
- .flags = 0
+ .resv = bo->tbo.base.resv
};
uint32_t domain;
int r;
- if (bo->pin_count)
+ if (bo->tbo.pin_count)
return 0;
/* Don't move this buffer if we have depleted our allowance
* to move it. Don't move anything if the threshold is zero.
*/
- if (p->bytes_moved < p->bytes_moved_threshold) {
+ if (p->bytes_moved < p->bytes_moved_threshold &&
+ (!bo->tbo.base.dma_buf ||
+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
@@ -450,21 +451,6 @@ retry:
return r;
}
-static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
-{
- struct amdgpu_cs_parser *p = param;
- int r;
-
- r = amdgpu_cs_bo_validate(p, bo);
- if (r)
- return r;
-
- if (bo->shadow)
- r = amdgpu_cs_bo_validate(p, bo->shadow);
-
- return r;
-}
-
static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
struct list_head *validated)
{
@@ -492,7 +478,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
lobj->user_pages);
}
- r = amdgpu_cs_validate(p, bo);
+ r = amdgpu_cs_bo_validate(p, bo);
if (r)
return r;
@@ -558,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!e->user_pages) {
- DRM_ERROR("calloc failure\n");
+ DRM_ERROR("kvmalloc_array failure\n");
return -ENOMEM;
}
@@ -586,13 +572,27 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out;
}
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+ if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+ e->chain = dma_fence_chain_alloc();
+ if (!e->chain) {
+ r = -ENOMEM;
+ goto error_validate;
+ }
+ }
+ }
+
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
- amdgpu_cs_validate, p);
+ amdgpu_cs_bo_validate, p);
if (r) {
DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
goto error_validate;
@@ -613,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- /* Make sure we use the exclusive slot for shared BOs */
- if (bo->prime_shared_count)
- e->tv.num_shared = 0;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
- }
-
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -643,24 +634,32 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
- if (r)
+ if (r) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ }
out:
return r;
}
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_bo_list_entry *e;
int r;
list_for_each_entry(e, &p->validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
- amdgpu_bo_explicit_sync(bo));
-
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
+ &fpriv->vm);
if (r)
return r;
}
@@ -668,11 +667,12 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
}
/**
- * cs_parser_fini() - clean parser states
+ * amdgpu_cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
+ * @backoff: indicator to backoff the reservation
*
- * If error is set than unvalidate buffer, otherwise just free memory
+ * If error is set then unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
@@ -680,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
+ struct amdgpu_bo_list_entry *e;
+
+ amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
+
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -701,7 +709,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
for (i = 0; i < parser->nchunks; i++)
kvfree(parser->chunks[i].kdata);
- kfree(parser->chunks);
+ kvfree(parser->chunks);
if (parser->job)
amdgpu_job_free(parser->job);
if (parser->uf_entry.tv.bo) {
@@ -791,33 +799,27 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync,
- fpriv->prt_va->last_pt_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
- struct dma_fence *f;
-
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
- r = amdgpu_vm_bo_update(adev, bo_va, false);
+ r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_fence *f;
-
/* ignore duplicates */
bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo)
@@ -827,12 +829,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (bo_va == NULL)
continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false);
+ r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -845,11 +846,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
if (r)
return r;
- p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
@@ -916,11 +917,17 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (parser->entity && parser->entity != entity)
return -EINVAL;
+ /* Return if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP*/
+ if (entity->rq == NULL)
+ return -EINVAL;
+
parser->entity = entity;
ring = to_amdgpu_ring(entity->rq->sched);
r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0, ib);
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -987,7 +994,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
if (r)
return r;
@@ -1009,7 +1016,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
return r;
@@ -1120,7 +1127,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
dep->chain = NULL;
if (syncobj_deps[i].point) {
- dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
+ dep->chain = dma_fence_chain_alloc();
if (!dep->chain)
return -ENOMEM;
}
@@ -1128,7 +1135,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
dep->syncobj = drm_syncobj_find(p->filp,
syncobj_deps[i].handle);
if (!dep->syncobj) {
- kfree(dep->chain);
+ dma_fence_chain_free(dep->chain);
return -EINVAL;
}
dep->point = syncobj_deps[i].point;
@@ -1203,8 +1210,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct drm_sched_entity *entity = p->entity;
- enum drm_sched_priority priority;
- struct amdgpu_ring *ring;
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
@@ -1213,10 +1218,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job = p->job;
p->job = NULL;
- r = drm_sched_job_init(&job->base, entity, p->filp);
+ r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
if (r)
goto error_unlock;
+ drm_sched_job_arm(&job->base);
+
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
* added to BOs.
@@ -1236,7 +1243,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
goto error_abort;
}
- job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
@@ -1255,14 +1261,32 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
trace_amdgpu_cs_ioctl(job);
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
- priority = job->base.s_priority;
- drm_sched_entity_push_job(&job->base, entity);
-
- ring = to_amdgpu_ring(entity->rq->sched);
- amdgpu_ring_priority_get(ring, priority);
+ drm_sched_entity_push_job(&job->base);
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_resv *resv = e->tv.bo->base.resv;
+ struct dma_fence_chain *chain = e->chain;
+
+ if (!chain)
+ continue;
+
+ /*
+ * Work around dma_resv shortcommings by wrapping up the
+ * submission in a dma_fence_chain and add it as exclusive
+ * fence, but first add the submission as shared fence to make
+ * sure that shared fences never signal before the exclusive
+ * one.
+ */
+ dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+ dma_fence_get(p->fence), 1);
+
+ dma_resv_add_shared_fence(resv, p->fence);
+ rcu_assign_pointer(resv->fence_excl, &chain->base);
+ e->chain = NULL;
+ }
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
@@ -1277,13 +1301,24 @@ error_unlock:
return r;
}
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+{
+ int i;
+
+ if (!trace_amdgpu_cs_enabled())
+ return;
+
+ for (i = 0; i < parser->job->num_ibs; i++)
+ trace_amdgpu_cs(parser, i);
+}
+
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
- struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
- int i, r;
+ int r;
if (amdgpu_ras_intr_triggered())
return -EHWPOISON;
@@ -1296,7 +1331,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
- DRM_ERROR("Failed to initialize parser %d!\n", r);
+ if (printk_ratelimit())
+ DRM_ERROR("Failed to initialize parser %d!\n", r);
goto out;
}
@@ -1321,8 +1357,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
reserved_buffers = true;
- for (i = 0; i < parser.job->num_ibs; i++)
- trace_amdgpu_cs(&parser, i);
+ trace_amdgpu_cs_ibs(&parser);
r = amdgpu_cs_vm_handling(&parser);
if (r)
@@ -1423,7 +1458,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_fence_to_handle *info = data;
struct dma_fence *fence;
struct drm_syncobj *syncobj;
@@ -1452,7 +1487,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
dma_fence_put(fence);
if (r)
return r;
- r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
+ r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
drm_syncobj_put(syncobj);
return r;
@@ -1480,7 +1515,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
}
/**
- * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
*
* @adev: amdgpu device
* @filp: file private
@@ -1599,7 +1634,7 @@ err_free_fence_array:
int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_wait_fences *wait = data;
uint32_t fence_count = wait->in.fence_count;
struct drm_amdgpu_fence *fences_user;
@@ -1631,11 +1666,12 @@ err_free_fences:
}
/**
- * amdgpu_cs_find_bo_va - find bo_va for VM address
+ * amdgpu_cs_find_mapping - find bo_va for VM address
*
* @parser: command submission parser context
* @addr: VM address
* @bo: resulting BO of the mapping found
+ * @map: Placeholder to return found BO mapping
*
* Search the buffer objects in the command submission context for a certain
* virtual memory address. Returns allocation structure when found, NULL