summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorYuBiao Wang <YuBiao.Wang@amd.com>2023-03-16 06:30:32 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-04-20 13:36:57 +0300
commitd711999534c40d9d7501cf4d9a94d6256e521d37 (patch)
tree41c786e0fb1bf2b4d3e9baec435fc2dc9027ae1a /drivers
parent3283aaa2f16960c707141f27d80827842f933fe4 (diff)
downloadlinux-d711999534c40d9d7501cf4d9a94d6256e521d37.tar.xz
drm/amdgpu: Force signal hw_fences that are embedded in non-sched jobs
[ Upstream commit 033c56474acf567a450f8bafca50e0b610f2b716 ] [Why] For engines not supporting soft reset, i.e. VCN, there will be a failed ib test before mode 1 reset during asic reset. The fences in this case are never signaled and next time when we try to free the sa_bo, kernel will hang. [How] During pre_asic_reset, driver will clear job fences and afterwards the fences' refcount will be reduced to 1. For drm_sched_jobs it will be released in job_free_cb, and for non-sched jobs like ib_test, it's meant to be released in sa_bo_free but only when the fences are signaled. So we have to force signal the non_sched bad job's fence during pre_asic_reset or the clear is not complete. Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com> Acked-by: Luben Tuikov <luben.tuikov@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index faff4a3f96e6..f52d0ba91a77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -678,6 +678,15 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
ptr = &ring->fence_drv.fences[i];
old = rcu_dereference_protected(*ptr, 1);
if (old && old->ops == &amdgpu_job_fence_ops) {
+ struct amdgpu_job *job;
+
+ /* For non-scheduler bad job, i.e. failed ib test, we need to signal
+ * it right here or we won't be able to track them in fence_drv
+ * and they will remain unsignaled during sa_bo free.
+ */
+ job = container_of(old, struct amdgpu_job, hw_fence);
+ if (!job->base.s_fence && !dma_fence_is_signaled(old))
+ dma_fence_signal(old);
RCU_INIT_POINTER(*ptr, NULL);
dma_fence_put(old);
}