diff options
| author | Ingo Molnar <mingo@kernel.org> | 2024-03-25 13:32:29 +0300 | 
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2024-03-25 13:32:29 +0300 | 
| commit | f4566a1e73957800df75a3dd2dccee8a4697f327 (patch) | |
| tree | b043b875228c0b25988af66c680d60cae69d761d /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
| parent | b9e6e28663928cab836a19abbdec3d036a07db3b (diff) | |
| parent | 4cece764965020c22cff7665b18a012006359095 (diff) | |
| download | linux-f4566a1e73957800df75a3dd2dccee8a4697f327.tar.xz | |
Merge tag 'v6.9-rc1' into sched/core, to pick up fixes and to refresh the branch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 255 | 
1 files changed, 178 insertions, 77 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b8fcb6c55698..4299ce386322 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -234,6 +234,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)  }  /** + * amdgpu_vm_bo_evicted_user - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for BOs used by user mode queues which are not at the location they + * should be. + */ +static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) +{ +	vm_bo->moved = true; +	spin_lock(&vm_bo->vm->status_lock); +	list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); +	spin_unlock(&vm_bo->vm->status_lock); +} + +/**   * amdgpu_vm_bo_relocated - vm_bo is reloacted   *   * @vm_bo: vm_bo which is relocated @@ -427,21 +443,25 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)  }  /** - * amdgpu_vm_validate_pt_bos - validate the page table BOs + * amdgpu_vm_validate - validate evicted BOs tracked in the VM   *   * @adev: amdgpu device pointer   * @vm: vm providing the BOs + * @ticket: optional reservation ticket used to reserve the VM   * @validate: callback to do the validation   * @param: parameter for the validation callback   * - * Validate the page table BOs on command submission if neccessary. + * Validate the page table BOs and per-VM BOs on command submission if + * necessary. If a ticket is given, also try to validate evicted user queue + * BOs. They must already be reserved with the given ticket.   *   * Returns:   * Validation result.   */ -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, -			      int (*validate)(void *p, struct amdgpu_bo *bo), -			      void *param) +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, +		       struct ww_acquire_ctx *ticket, +		       int (*validate)(void *p, struct amdgpu_bo *bo), +		       void *param)  {  	struct amdgpu_vm_bo_base *bo_base;  	struct amdgpu_bo *shadow; @@ -484,6 +504,34 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		}  		spin_lock(&vm->status_lock);  	} +	while (ticket && !list_empty(&vm->evicted_user)) { +		bo_base = list_first_entry(&vm->evicted_user, +					   struct amdgpu_vm_bo_base, +					   vm_status); +		spin_unlock(&vm->status_lock); + +		bo = bo_base->bo; + +		if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { +			struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + +			pr_warn_ratelimited("Evicted user BO is not reserved\n"); +			if (ti) { +				pr_warn_ratelimited("pid %d\n", ti->pid); +				amdgpu_vm_put_task_info(ti); +			} + +			return -EINVAL; +		} + +		r = validate(param, bo); +		if (r) +			return r; + +		amdgpu_vm_bo_invalidated(bo_base); + +		spin_lock(&vm->status_lock); +	}  	spin_unlock(&vm->status_lock);  	amdgpu_vm_eviction_lock(vm); @@ -610,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	bool vm_flush_needed = job->vm_needs_flush;  	struct dma_fence *fence = NULL;  	bool pasid_mapping_needed = false; -	unsigned patch_offset = 0; +	unsigned int patch;  	int r;  	if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -637,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	amdgpu_ring_ib_begin(ring);  	if (ring->funcs->init_cond_exec) -		patch_offset = amdgpu_ring_init_cond_exec(ring); +		patch = amdgpu_ring_init_cond_exec(ring, +						   ring->cond_exe_gpu_addr);  	if (need_pipe_sync)  		amdgpu_ring_emit_pipeline_sync(ring); @@ -651,7 +700,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);  	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) -		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); +		adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);  	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&  	    gds_switch_needed) { @@ -685,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	}  	dma_fence_put(fence); -	if (ring->funcs->patch_cond_exec) -		amdgpu_ring_patch_cond_exec(ring, patch_offset); +	amdgpu_ring_patch_cond_exec(ring, patch);  	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */  	if (ring->funcs->emit_switch_buffer) { @@ -1343,10 +1391,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,  			struct amdgpu_bo_va_mapping, list);  		list_del(&mapping->list); -		if (vm->pte_support_ats && -		    mapping->start < AMDGPU_GMC_HOLE_START) -			init_pte_value = AMDGPU_PTE_DEFAULT_ATC; -  		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,  					   resv, mapping->start, mapping->last,  					   init_pte_value, 0, 0, NULL, NULL, @@ -1426,11 +1470,21 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,  		}  		r = amdgpu_vm_bo_update(adev, bo_va, clear); -		if (r) -			return r;  		if (unlock)  			dma_resv_unlock(resv); +		if (r) +			return r; + +		/* Remember evicted DMABuf imports in compute VMs for later +		 * validation +		 */ +		if (vm->is_compute_context && +		    bo_va->base.bo->tbo.base.import_attach && +		    (!bo_va->base.bo->tbo.resource || +		     bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) +			amdgpu_vm_bo_evicted_user(&bo_va->base); +  		spin_lock(&vm->status_lock);  	}  	spin_unlock(&vm->status_lock); @@ -2173,6 +2227,108 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)  	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);  } +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ +	struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + +	kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ +	struct amdgpu_vm *vm; +	unsigned long flags; + +	xa_lock_irqsave(&adev->vm_manager.pasids, flags); +	vm = xa_load(&adev->vm_manager.pasids, pasid); +	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + +	return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ +	kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ +	struct amdgpu_task_info *ti = NULL; + +	if (vm) { +		ti = vm->task_info; +		kref_get(&vm->task_info->refcount); +	} + +	return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ +	return amdgpu_vm_get_task_info_vm( +			amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ +	vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); +	if (!vm->task_info) +		return -ENOMEM; + +	kref_init(&vm->task_info->refcount); +	return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ +	if (!vm->task_info) +		return; + +	if (vm->task_info->pid == current->pid) +		return; + +	vm->task_info->pid = current->pid; +	get_task_comm(vm->task_info->task_name, current); + +	if (current->group_leader->mm != current->mm) +		return; + +	vm->task_info->tgid = current->group_leader->pid; +	get_task_comm(vm->task_info->process_name, current->group_leader); +} +  /**   * amdgpu_vm_init - initialize a vm instance   * @@ -2196,6 +2352,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)  		vm->reserved_vmid[i] = NULL;  	INIT_LIST_HEAD(&vm->evicted); +	INIT_LIST_HEAD(&vm->evicted_user);  	INIT_LIST_HEAD(&vm->relocated);  	INIT_LIST_HEAD(&vm->moved);  	INIT_LIST_HEAD(&vm->idle); @@ -2211,7 +2368,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	if (r)  		return r; -	vm->pte_support_ats = false;  	vm->is_compute_context = false;  	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2258,6 +2414,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	if (r)  		goto error_free_root; +	r = amdgpu_vm_create_task_info(vm); +	if (r) +		DRM_DEBUG("Failed to create task info for VM\n"); +  	amdgpu_bo_unreserve(vm->root.bo);  	amdgpu_bo_unref(&root_bo); @@ -2297,30 +2457,12 @@ error_free_delayed:   */  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)  { -	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);  	int r;  	r = amdgpu_bo_reserve(vm->root.bo, true);  	if (r)  		return r; -	/* Check if PD needs to be reinitialized and do it before -	 * changing any other state, in case it fails. -	 */ -	if (pte_support_ats != vm->pte_support_ats) { -		/* Sanity checks */ -		if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { -			r = -EINVAL; -			goto unreserve_bo; -		} - -		vm->pte_support_ats = pte_support_ats; -		r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), -				       false); -		if (r) -			goto unreserve_bo; -	} -  	/* Update VM state */  	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &  				    AMDGPU_VM_USE_CPU_FOR_COMPUTE); @@ -2397,6 +2539,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)  	root = amdgpu_bo_ref(vm->root.bo);  	amdgpu_bo_reserve(root, true); +	amdgpu_vm_put_task_info(vm->task_info);  	amdgpu_vm_set_pasid(adev, vm, 0);  	dma_fence_wait(vm->last_unlocked, false);  	dma_fence_put(vm->last_unlocked); @@ -2554,48 +2697,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  }  /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, -			 struct amdgpu_task_info *task_info) -{ -	struct amdgpu_vm *vm; -	unsigned long flags; - -	xa_lock_irqsave(&adev->vm_manager.pasids, flags); - -	vm = xa_load(&adev->vm_manager.pasids, pasid); -	if (vm) -		*task_info = vm->task_info; - -	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ -	if (vm->task_info.pid) -		return; - -	vm->task_info.pid = current->pid; -	get_task_comm(vm->task_info.task_name, current); - -	if (current->group_leader->mm != current->mm) -		return; - -	vm->task_info.tgid = current->group_leader->pid; -	get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/**   * amdgpu_vm_handle_fault - graceful handling of VM faults.   * @adev: amdgpu device pointer   * @pasid: PASID of the VM  | 
