From 70e33073d941b4fc1e773769bd7d72220c14d41d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 21 Jul 2025 19:04:34 +0530 Subject: drm/amdgpu: Fix kdoc style in amdgpu_fence.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial comment block before amdgpu_fence_driver_guilty_force_completion() incorrectly used '/**' but is not a kernel-doc comment, causing build warnings. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c:742: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Kernel queue reset handling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 9e7506965cab..bcb74286a78a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -738,7 +738,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) } -/** +/* * Kernel queue reset handling * * The driver can reset individual queues for most engines, but those queues -- cgit v1.2.3 From ee6ba1e69dd7d969ab36f16d81c930b1a7fbf61c Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Tue, 19 Aug 2025 16:25:21 +0800 Subject: drm/amdgpu/fence: Remove redundant 0 value initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amdgpu_fence struct is already zeroed by kzalloc(). It's redundant to initialize am_fence->context to 0. Reviewed-by: Christian König Signed-off-by: Liao Yuanhong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index bcb74286a78a..fd8cca241da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -120,7 +120,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); if (!am_fence) return -ENOMEM; - am_fence->context = 0; } else { am_fence = af; } -- cgit v1.2.3 From 1f22fcb88bfef26a966e9eb242c692c6bf253d47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Sep 2025 12:37:32 -0400 Subject: drm/amdgpu: handle wrap around in reemit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compare the sequence numbers directly. Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fd8cca241da6..e270df30c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -790,14 +790,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr, i, seqno; + u64 wptr; + u32 seq, last_seq; - seqno = amdgpu_fence_read(ring); + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; - for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { - ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; rcu_read_lock(); unprocessed = rcu_dereference(*ptr); @@ -813,7 +818,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, wptr = fence->wptr; } rcu_read_unlock(); - } + } while (last_seq != seq); } /* -- cgit v1.2.3 From ff780f4f80323148d43198f2052c14160c8428d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Sep 2025 13:48:23 -0400 Subject: drm/amdgpu: set an error on all fences from a bad context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we backup ring contents to reemit after a queue reset, we don't backup ring contents from the bad context. When we signal the fences, we should set an error on those fences as well. v2: misc cleanups v3: add locking for fence error, fix comment (Christian) v4: fix wrap around, locking (Christian) Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 +++++++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 3 files changed, 37 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e270df30c279..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) * @fence: fence of the ring to signal * */ -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) { - dma_fence_set_error(&fence->base, -ETIME); - amdgpu_fence_write(fence->ring, fence->seq); - amdgpu_fence_process(fence->ring); + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); } void amdgpu_fence_save_wptr(struct dma_fence *fence) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8f6ce948c684..5ec5c3ff22bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -811,7 +811,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* signal the fence of the bad job */ + /* signal the guilty fence and set an error on all fences from the context */ if (guilty_fence) amdgpu_fence_driver_guilty_force_completion(guilty_fence); /* Re-emit the non-guilty commands */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b6b649179776..4b46e3c26ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -155,7 +155,7 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); -- cgit v1.2.3