summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2017-01-10 19:39:52 +0300
committerMark Brown <broonie@kernel.org>2017-01-10 19:39:52 +0300
commit9c1852b459f04f6309e40d1d167512b0a5598529 (patch)
treea74526f1fe3f9826b81327f0b9ba9b98a543f87c /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent9b41da80e09128574f09bed8dc5a5fc6f72a8239 (diff)
parent7ce7d89f48834cefece7804d38fc5d85382edf77 (diff)
downloadlinux-9c1852b459f04f6309e40d1d167512b0a5598529.tar.xz
Merge tag 'v4.10-rc1' into asoc-samsung
Linux 4.10-rc1
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c307
1 files changed, 248 insertions, 59 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82dc8d20e28a..29d6d84d1c28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -355,6 +355,7 @@ static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
u64 initial_bytes_moved;
uint32_t domain;
int r;
@@ -372,9 +373,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
retry:
amdgpu_ttm_placement_from_domain(bo, domain);
- initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+ initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+ p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
initial_bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -387,9 +388,9 @@ retry:
/* Last resort, try to evict something from the current working set */
static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
- struct amdgpu_bo_list_entry *lobj)
+ struct amdgpu_bo *validated)
{
- uint32_t domain = lobj->robj->allowed_domains;
+ uint32_t domain = validated->allowed_domains;
int r;
if (!p->evictable)
@@ -400,11 +401,12 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
u64 initial_bytes_moved;
uint32_t other;
/* If we reached our current BO we can forget it */
- if (candidate == lobj)
+ if (candidate->robj == validated)
break;
other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
@@ -420,9 +422,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
- initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+ initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+ p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
initial_bytes_moved;
if (unlikely(r))
@@ -437,6 +439,23 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
return false;
}
+static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
+{
+ struct amdgpu_cs_parser *p = param;
+ int r;
+
+ do {
+ r = amdgpu_cs_bo_validate(p, bo);
+ } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+ if (r)
+ return r;
+
+ if (bo->shadow)
+ r = amdgpu_cs_bo_validate(p, bo->shadow);
+
+ return r;
+}
+
static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
struct list_head *validated)
{
@@ -464,18 +483,10 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (p->evictable == lobj)
p->evictable = NULL;
- do {
- r = amdgpu_cs_bo_validate(p, bo);
- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj));
+ r = amdgpu_cs_validate(p, bo);
if (r)
return r;
- if (bo->shadow) {
- r = amdgpu_cs_bo_validate(p, bo);
- if (r)
- return r;
- }
-
if (binding_userptr) {
drm_free_large(lobj->user_pages);
lobj->user_pages = NULL;
@@ -594,14 +605,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_splice(&need_pages, &p->validated);
}
- amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
-
p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
p->bytes_moved = 0;
p->evictable = list_last_entry(&p->validated,
struct amdgpu_bo_list_entry,
tv.head);
+ r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
+ amdgpu_cs_validate, p);
+ if (r) {
+ DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
+ goto error_validate;
+ }
+
r = amdgpu_cs_list_validate(p, &duplicates);
if (r) {
DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
@@ -720,7 +736,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
- fence_put(parser->fence);
+ dma_fence_put(parser->fence);
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
@@ -757,7 +773,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (p->bo_list) {
for (i = 0; i < p->bo_list->num_entries; i++) {
- struct fence *f;
+ struct dma_fence *f;
/* ignore duplicates */
bo = p->bo_list->array[i].robj;
@@ -807,13 +823,14 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
/* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs) {
- p->job->vm = NULL;
for (i = 0; i < p->job->num_ibs; i++) {
r = amdgpu_ring_parse_cs(ring, p, i);
if (r)
return r;
}
- } else {
+ }
+
+ if (p->job->vm) {
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
r = amdgpu_bo_vm_update_pte(p, vm);
@@ -824,16 +841,6 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
return amdgpu_cs_sync_rings(p);
}
-static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
-{
- if (r == -EDEADLK) {
- r = amdgpu_gpu_reset(adev);
- if (!r)
- r = -EAGAIN;
- }
- return r;
-}
-
static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct amdgpu_cs_parser *parser)
{
@@ -902,7 +909,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
kptr += chunk_ib->va_start - offset;
- r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
+ r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -917,9 +924,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return r;
}
- ib->gpu_addr = chunk_ib->va_start;
}
+ ib->gpu_addr = chunk_ib->va_start;
ib->length_dw = chunk_ib->ib_bytes / 4;
ib->flags = chunk_ib->flags;
j++;
@@ -927,8 +934,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
/* UVD & VCE fw doesn't support user fences */
if (parser->job->uf_addr && (
- parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
+ parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
return 0;
@@ -957,7 +964,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
for (j = 0; j < num_deps; ++j) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
- struct fence *fence;
+ struct dma_fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
@@ -979,7 +986,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
} else if (fence) {
r = amdgpu_sync_fence(adev, &p->job->sync,
fence);
- fence_put(fence);
+ dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
@@ -1009,7 +1016,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp;
job->fence_ctx = entity->fence_context;
- p->fence = fence_get(&job->base.s_fence->finished);
+ p->fence = dma_fence_get(&job->base.s_fence->finished);
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
@@ -1037,29 +1044,29 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
- amdgpu_cs_parser_fini(&parser, r, false);
- r = amdgpu_cs_handle_lockup(adev, r);
- return r;
- }
- r = amdgpu_cs_parser_bos(&parser, data);
- if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
- else if (r && r != -ERESTARTSYS)
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- else if (!r) {
- reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, &parser);
+ goto out;
}
- if (!r) {
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r)
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ r = amdgpu_cs_parser_bos(&parser, data);
+ if (r) {
+ if (r == -ENOMEM)
+ DRM_ERROR("Not enough memory for command submission!\n");
+ else if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
+ goto out;
}
+ reserved_buffers = true;
+ r = amdgpu_cs_ib_fill(adev, &parser);
if (r)
goto out;
+ r = amdgpu_cs_dependencies(adev, &parser);
+ if (r) {
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ goto out;
+ }
+
for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i);
@@ -1071,7 +1078,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
- r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
@@ -1092,7 +1098,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
- struct fence *fence;
+ struct dma_fence *fence;
long r;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
@@ -1108,8 +1114,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(fence))
r = PTR_ERR(fence);
else if (fence) {
- r = fence_wait_timeout(fence, true, timeout);
- fence_put(fence);
+ r = dma_fence_wait_timeout(fence, true, timeout);
+ dma_fence_put(fence);
} else
r = 1;
@@ -1124,6 +1130,180 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
}
/**
+ * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @user: drm_amdgpu_fence copied from user space
+ */
+static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_fence *user)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_ctx *ctx;
+ struct dma_fence *fence;
+ int r;
+
+ r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
+ user->ring, &ring);
+ if (r)
+ return ERR_PTR(r);
+
+ ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
+ if (ctx == NULL)
+ return ERR_PTR(-EINVAL);
+
+ fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+ amdgpu_ctx_put(ctx);
+
+ return fence;
+}
+
+/**
+ * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_wait_fences *wait,
+ struct drm_amdgpu_fence *fences)
+{
+ uint32_t fence_count = wait->in.fence_count;
+ unsigned int i;
+ long r = 1;
+
+ for (i = 0; i < fence_count; i++) {
+ struct dma_fence *fence;
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+
+ fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ r = dma_fence_wait_timeout(fence, true, timeout);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+ }
+
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r > 0);
+
+ return 0;
+}
+
+/**
+ * amdgpu_cs_wait_any_fence - wait on any fence to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_wait_fences *wait,
+ struct drm_amdgpu_fence *fences)
+{
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+ uint32_t fence_count = wait->in.fence_count;
+ uint32_t first = ~0;
+ struct dma_fence **array;
+ unsigned int i;
+ long r;
+
+ /* Prepare the fence array */
+ array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
+
+ if (array == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < fence_count; i++) {
+ struct dma_fence *fence;
+
+ fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+ if (IS_ERR(fence)) {
+ r = PTR_ERR(fence);
+ goto err_free_fence_array;
+ } else if (fence) {
+ array[i] = fence;
+ } else { /* NULL, the fence has been already signaled */
+ r = 1;
+ goto out;
+ }
+ }
+
+ r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
+ &first);
+ if (r < 0)
+ goto err_free_fence_array;
+
+out:
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r > 0);
+ wait->out.first_signaled = first;
+ /* set return value 0 to indicate success */
+ r = 0;
+
+err_free_fence_array:
+ for (i = 0; i < fence_count; i++)
+ dma_fence_put(array[i]);
+ kfree(array);
+
+ return r;
+}
+
+/**
+ * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ */
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ union drm_amdgpu_wait_fences *wait = data;
+ uint32_t fence_count = wait->in.fence_count;
+ struct drm_amdgpu_fence *fences_user;
+ struct drm_amdgpu_fence *fences;
+ int r;
+
+ /* Get the fences from userspace */
+ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
+ GFP_KERNEL);
+ if (fences == NULL)
+ return -ENOMEM;
+
+ fences_user = (void __user *)(unsigned long)(wait->in.fences);
+ if (copy_from_user(fences, fences_user,
+ sizeof(struct drm_amdgpu_fence) * fence_count)) {
+ r = -EFAULT;
+ goto err_free_fences;
+ }
+
+ if (wait->in.wait_all)
+ r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
+ else
+ r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
+
+err_free_fences:
+ kfree(fences);
+
+ return r;
+}
+
+/**
* amdgpu_cs_find_bo_va - find bo_va for VM address
*
* @parser: command submission parser context
@@ -1196,6 +1376,15 @@ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
if (unlikely(r))
return r;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ continue;
+
+ bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+ if (unlikely(r))
+ return r;
}
return 0;