diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 391 | 
1 files changed, 260 insertions, 131 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1f040d85ac47..fd16652aa277 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -126,46 +126,70 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,  	return 0;  } +struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, +                                               struct drm_file *filp, +                                               struct amdgpu_ctx *ctx, +                                               struct amdgpu_ib *ibs, +                                               uint32_t num_ibs) +{ +	struct amdgpu_cs_parser *parser; +	int i; + +	parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL); +	if (!parser) +		return NULL; + +	parser->adev = adev; +	parser->filp = filp; +	parser->ctx = ctx; +	parser->ibs = ibs; +	parser->num_ibs = num_ibs; +	for (i = 0; i < num_ibs; i++) +		ibs[i].ctx = ctx; + +	return parser; +} +  int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  {  	union drm_amdgpu_cs *cs = data;  	uint64_t *chunk_array_user; -	uint64_t *chunk_array = NULL; +	uint64_t *chunk_array;  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; -	unsigned size, i; -	int r = 0; +	unsigned size; +	int i; +	int ret; -	if (!cs->in.num_chunks) -		goto out; +	if (cs->in.num_chunks == 0) +		return 0; + +	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); +	if (!chunk_array) +		return -ENOMEM;  	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);  	if (!p->ctx) { -		r = -EINVAL; -		goto out; +		ret = -EINVAL; +		goto free_chunk;  	} +  	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);  	/* get chunks */  	INIT_LIST_HEAD(&p->validated); -	chunk_array = kcalloc(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); -	if (chunk_array == NULL) { -		r = -ENOMEM; -		goto out; -	} - -	chunk_array_user = (uint64_t *)(unsigned long)(cs->in.chunks); +	chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);  	if (copy_from_user(chunk_array, chunk_array_user,  			   sizeof(uint64_t)*cs->in.num_chunks)) { -		r = -EFAULT; -		goto out; +		ret = -EFAULT; +		goto put_bo_list;  	}  	p->nchunks = cs->in.num_chunks; -	p->chunks = kcalloc(p->nchunks, sizeof(struct amdgpu_cs_chunk), +	p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),  			    GFP_KERNEL); -	if (p->chunks == NULL) { -		r = -ENOMEM; -		goto out; +	if (!p->chunks) { +		ret = -ENOMEM; +		goto put_bo_list;  	}  	for (i = 0; i < p->nchunks; i++) { @@ -176,8 +200,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  		chunk_ptr = (void __user *)(unsigned long)chunk_array[i];  		if (copy_from_user(&user_chunk, chunk_ptr,  				       sizeof(struct drm_amdgpu_cs_chunk))) { -			r = -EFAULT; -			goto out; +			ret = -EFAULT; +			i--; +			goto free_partial_kdata;  		}  		p->chunks[i].chunk_id = user_chunk.chunk_id;  		p->chunks[i].length_dw = user_chunk.length_dw; @@ -188,13 +213,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));  		if (p->chunks[i].kdata == NULL) { -			r = -ENOMEM; -			goto out; +			ret = -ENOMEM; +			i--; +			goto free_partial_kdata;  		}  		size *= sizeof(uint32_t);  		if (copy_from_user(p->chunks[i].kdata, cdata, size)) { -			r = -EFAULT; -			goto out; +			ret = -EFAULT; +			goto free_partial_kdata;  		}  		switch (p->chunks[i].chunk_id) { @@ -214,15 +240,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  				gobj = drm_gem_object_lookup(p->adev->ddev,  							     p->filp, handle);  				if (gobj == NULL) { -					r = -EINVAL; -					goto out; +					ret = -EINVAL; +					goto free_partial_kdata;  				}  				p->uf.bo = gem_to_amdgpu_bo(gobj);  				p->uf.offset = fence_data->offset;  			} else { -				r = -EINVAL; -				goto out; +				ret = -EINVAL; +				goto free_partial_kdata;  			}  			break; @@ -230,20 +256,35 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  			break;  		default: -			r = -EINVAL; -			goto out; +			ret = -EINVAL; +			goto free_partial_kdata;  		}  	} +  	p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);  	if (!p->ibs) { -		r = -ENOMEM; -		goto out; +		ret = -ENOMEM; +		goto free_all_kdata;  	} -out:  	kfree(chunk_array); -	return r; +	return 0; + +free_all_kdata: +	i = p->nchunks - 1; +free_partial_kdata: +	for (; i >= 0; i--) +		drm_free_large(p->chunks[i].kdata); +	kfree(p->chunks); +put_bo_list: +	if (p->bo_list) +		amdgpu_bo_list_put(p->bo_list); +	amdgpu_ctx_put(p->ctx); +free_chunk: +	kfree(chunk_array); + +	return ret;  }  /* Returns how many bytes TTM can move per IB. @@ -298,25 +339,17 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)  	return max(bytes_moved_threshold, 1024*1024ull);  } -int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p) +int amdgpu_cs_list_validate(struct amdgpu_device *adev, +			    struct amdgpu_vm *vm, +			    struct list_head *validated)  { -	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; -	struct amdgpu_vm *vm = &fpriv->vm; -	struct amdgpu_device *adev = p->adev;  	struct amdgpu_bo_list_entry *lobj; -	struct list_head duplicates;  	struct amdgpu_bo *bo;  	u64 bytes_moved = 0, initial_bytes_moved;  	u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);  	int r; -	INIT_LIST_HEAD(&duplicates); -	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); -	if (unlikely(r != 0)) { -		return r; -	} - -	list_for_each_entry(lobj, &p->validated, tv.head) { +	list_for_each_entry(lobj, validated, tv.head) {  		bo = lobj->robj;  		if (!bo->pin_count) {  			u32 domain = lobj->prefered_domains; @@ -331,7 +364,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)  			 * into account. We don't want to disallow buffer moves  			 * completely.  			 */ -			if (current_domain != AMDGPU_GEM_DOMAIN_CPU && +			if ((lobj->allowed_domains & current_domain) != 0 &&  			    (domain & current_domain) == 0 && /* will be moved */  			    bytes_moved > bytes_moved_threshold) {  				/* don't move it */ @@ -350,7 +383,6 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)  					domain = lobj->allowed_domains;  					goto retry;  				} -				ttm_eu_backoff_reservation(&p->ticket, &p->validated);  				return r;  			}  		} @@ -363,6 +395,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;  	struct amdgpu_cs_buckets buckets; +	struct list_head duplicates;  	bool need_mmap_lock = false;  	int i, r; @@ -382,8 +415,22 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)  	if (need_mmap_lock)  		down_read(¤t->mm->mmap_sem); -	r = amdgpu_cs_list_validate(p); +	INIT_LIST_HEAD(&duplicates); +	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); +	if (unlikely(r != 0)) +		goto error_reserve; + +	r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); +	if (r) +		goto error_validate; + +	r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); +error_validate: +	if (r) +		ttm_eu_backoff_reservation(&p->ticket, &p->validated); + +error_reserve:  	if (need_mmap_lock)  		up_read(¤t->mm->mmap_sem); @@ -415,18 +462,8 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,  	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;  } -/** - * cs_parser_fini() - clean parser states - * @parser:	parser structure holding parsing context. - * @error:	error number - * - * If error is set than unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)  { -	unsigned i; -  	if (!error) {  		/* Sort the buffer list from the smallest to largest buffer,  		 * which affects the order of buffers in the LRU list. @@ -447,21 +484,45 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo  		ttm_eu_backoff_reservation(&parser->ticket,  					   &parser->validated);  	} +} +static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) +{ +	unsigned i;  	if (parser->ctx)  		amdgpu_ctx_put(parser->ctx);  	if (parser->bo_list)  		amdgpu_bo_list_put(parser->bo_list); +  	drm_free_large(parser->vm_bos);  	for (i = 0; i < parser->nchunks; i++)  		drm_free_large(parser->chunks[i].kdata);  	kfree(parser->chunks); -	if (parser->ibs) -		for (i = 0; i < parser->num_ibs; i++) -			amdgpu_ib_free(parser->adev, &parser->ibs[i]); -	kfree(parser->ibs); -	if (parser->uf.bo) -		drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); +	if (!amdgpu_enable_scheduler) +	{ +		if (parser->ibs) +			for (i = 0; i < parser->num_ibs; i++) +				amdgpu_ib_free(parser->adev, &parser->ibs[i]); +		kfree(parser->ibs); +		if (parser->uf.bo) +			drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); +	} + +	kfree(parser); +} + +/** + * cs_parser_fini() - clean parser states + * @parser:	parser structure holding parsing context. + * @error:	error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +{ +       amdgpu_cs_parser_fini_early(parser, error, backoff); +       amdgpu_cs_parser_fini_late(parser);  }  static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, @@ -476,12 +537,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,  	if (r)  		return r; +	r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); +	if (r) +		return r; +  	r = amdgpu_vm_clear_freed(adev, vm);  	if (r)  		return r;  	if (p->bo_list) {  		for (i = 0; i < p->bo_list->num_entries; i++) { +			struct fence *f; +  			/* ignore duplicates */  			bo = p->bo_list->array[i].robj;  			if (!bo) @@ -495,7 +562,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,  			if (r)  				return r; -			amdgpu_sync_fence(&p->ibs[0].sync, bo_va->last_pt_update); +			f = bo_va->last_pt_update; +			r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); +			if (r) +				return r;  		}  	} @@ -529,9 +599,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  		goto out;  	}  	amdgpu_cs_sync_rings(parser); - -	r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, -			       parser->filp); +	if (!amdgpu_enable_scheduler) +		r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, +				       parser->filp);  out:  	mutex_unlock(&vm->mutex); @@ -650,7 +720,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  			ib->oa_size = amdgpu_bo_size(oa);  		}  	} -  	/* wrap the last IB with user fence */  	if (parser->uf.bo) {  		struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; @@ -693,9 +762,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,  			sizeof(struct drm_amdgpu_cs_chunk_dep);  		for (j = 0; j < num_deps; ++j) { -			struct amdgpu_fence *fence;  			struct amdgpu_ring *ring;  			struct amdgpu_ctx *ctx; +			struct fence *fence;  			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,  					       deps[j].ip_instance, @@ -707,85 +776,137 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,  			if (ctx == NULL)  				return -EINVAL; -			r = amdgpu_fence_recreate(ring, p->filp, -						  deps[j].handle, -						  &fence); -			if (r) { +			fence = amdgpu_ctx_get_fence(ctx, ring, +						     deps[j].handle); +			if (IS_ERR(fence)) { +				r = PTR_ERR(fence);  				amdgpu_ctx_put(ctx);  				return r; -			} -			amdgpu_sync_fence(&ib->sync, fence); -			amdgpu_fence_unref(&fence); -			amdgpu_ctx_put(ctx); +			} else if (fence) { +				r = amdgpu_sync_fence(adev, &ib->sync, fence); +				fence_put(fence); +				amdgpu_ctx_put(ctx); +				if (r) +					return r; +			}  		}  	}  	return 0;  } +static int amdgpu_cs_free_job(struct amdgpu_job *job) +{ +	int i; +	if (job->ibs) +		for (i = 0; i < job->num_ibs; i++) +			amdgpu_ib_free(job->adev, &job->ibs[i]); +	kfree(job->ibs); +	if (job->uf.bo) +		drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base); +	return 0; +} +  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  {  	struct amdgpu_device *adev = dev->dev_private;  	union drm_amdgpu_cs *cs = data; -	struct amdgpu_cs_parser parser; -	int r, i; +	struct amdgpu_cs_parser *parser;  	bool reserved_buffers = false; +	int i, r;  	down_read(&adev->exclusive_lock);  	if (!adev->accel_working) {  		up_read(&adev->exclusive_lock);  		return -EBUSY;  	} -	/* initialize parser */ -	memset(&parser, 0, sizeof(struct amdgpu_cs_parser)); -	parser.filp = filp; -	parser.adev = adev; -	r = amdgpu_cs_parser_init(&parser, data); + +	parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); +	if (!parser) +		return -ENOMEM; +	r = amdgpu_cs_parser_init(parser, data);  	if (r) {  		DRM_ERROR("Failed to initialize parser !\n"); -		amdgpu_cs_parser_fini(&parser, r, false); +		kfree(parser);  		up_read(&adev->exclusive_lock);  		r = amdgpu_cs_handle_lockup(adev, r);  		return r;  	} -	r = amdgpu_cs_parser_relocs(&parser); -	if (r) { -		if (r != -ERESTARTSYS) { -			if (r == -ENOMEM) -				DRM_ERROR("Not enough memory for command submission!\n"); -			else -				DRM_ERROR("Failed to process the buffer list %d!\n", r); -		} +	r = amdgpu_cs_parser_relocs(parser); +	if (r == -ENOMEM) +		DRM_ERROR("Not enough memory for command submission!\n"); +	else if (r && r != -ERESTARTSYS) +		DRM_ERROR("Failed to process the buffer list %d!\n", r); +	else if (!r) { +		reserved_buffers = true; +		r = amdgpu_cs_ib_fill(adev, parser);  	}  	if (!r) { -		reserved_buffers = true; -		r = amdgpu_cs_ib_fill(adev, &parser); +		r = amdgpu_cs_dependencies(adev, parser); +		if (r) +			DRM_ERROR("Failed in the dependencies handling %d!\n", r);  	} -	if (!r) -		r = amdgpu_cs_dependencies(adev, &parser); - -	if (r) { -		amdgpu_cs_parser_fini(&parser, r, reserved_buffers); -		up_read(&adev->exclusive_lock); -		r = amdgpu_cs_handle_lockup(adev, r); -		return r; -	} +	if (r) +		goto out; -	for (i = 0; i < parser.num_ibs; i++) -		trace_amdgpu_cs(&parser, i); +	for (i = 0; i < parser->num_ibs; i++) +		trace_amdgpu_cs(parser, i); -	r = amdgpu_cs_ib_vm_chunk(adev, &parser); -	if (r) { +	r = amdgpu_cs_ib_vm_chunk(adev, parser); +	if (r)  		goto out; + +	if (amdgpu_enable_scheduler && parser->num_ibs) { +		struct amdgpu_job *job; +		struct amdgpu_ring * ring =  parser->ibs->ring; +		job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); +		if (!job) +			return -ENOMEM; +		job->base.sched = &ring->sched; +		job->base.s_entity = &parser->ctx->rings[ring->idx].entity; +		job->adev = parser->adev; +		job->ibs = parser->ibs; +		job->num_ibs = parser->num_ibs; +		job->base.owner = parser->filp; +		mutex_init(&job->job_lock); +		if (job->ibs[job->num_ibs - 1].user) { +			memcpy(&job->uf,  &parser->uf, +			       sizeof(struct amdgpu_user_fence)); +			job->ibs[job->num_ibs - 1].user = &job->uf; +		} + +		job->free_job = amdgpu_cs_free_job; +		mutex_lock(&job->job_lock); +		r = amd_sched_entity_push_job(&job->base); +		if (r) { +			mutex_unlock(&job->job_lock); +			amdgpu_cs_free_job(job); +			kfree(job); +			goto out; +		} +		cs->out.handle = +			amdgpu_ctx_add_fence(parser->ctx, ring, +					     &job->base.s_fence->base); +		parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; + +		list_sort(NULL, &parser->validated, cmp_size_smaller_first); +		ttm_eu_fence_buffer_objects(&parser->ticket, +				&parser->validated, +				&job->base.s_fence->base); + +		mutex_unlock(&job->job_lock); +		amdgpu_cs_parser_fini_late(parser); +		up_read(&adev->exclusive_lock); +		return 0;  	} -	cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; +	cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;  out: -	amdgpu_cs_parser_fini(&parser, r, true); +	amdgpu_cs_parser_fini(parser, r, reserved_buffers);  	up_read(&adev->exclusive_lock);  	r = amdgpu_cs_handle_lockup(adev, r);  	return r; @@ -806,30 +927,29 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,  	union drm_amdgpu_wait_cs *wait = data;  	struct amdgpu_device *adev = dev->dev_private;  	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); -	struct amdgpu_fence *fence = NULL;  	struct amdgpu_ring *ring = NULL;  	struct amdgpu_ctx *ctx; +	struct fence *fence;  	long r; -	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); -	if (ctx == NULL) -		return -EINVAL; -  	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,  			       wait->in.ring, &ring); -	if (r) { -		amdgpu_ctx_put(ctx); +	if (r)  		return r; -	} -	r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); -	if (r) { -		amdgpu_ctx_put(ctx); -		return r; -	} +	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); +	if (ctx == NULL) +		return -EINVAL; + +	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); +	if (IS_ERR(fence)) +		r = PTR_ERR(fence); +	else if (fence) { +		r = fence_wait_timeout(fence, true, timeout); +		fence_put(fence); +	} else +		r = 1; -	r = fence_wait_timeout(&fence->base, true, timeout); -	amdgpu_fence_unref(&fence);  	amdgpu_ctx_put(ctx);  	if (r < 0)  		return r; @@ -864,7 +984,16 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,  		if (!reloc->bo_va)  			continue; -		list_for_each_entry(mapping, &reloc->bo_va->mappings, list) { +		list_for_each_entry(mapping, &reloc->bo_va->valids, list) { +			if (mapping->it.start > addr || +			    addr > mapping->it.last) +				continue; + +			*bo = reloc->bo_va->bo; +			return mapping; +		} + +		list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {  			if (mapping->it.start > addr ||  			    addr > mapping->it.last)  				continue; | 
