diff options
| author | Boris Brezillon <boris.brezillon@collabora.com> | 2025-11-28 11:48:37 +0300 |
|---|---|---|
| committer | Boris Brezillon <boris.brezillon@collabora.com> | 2025-11-28 12:17:44 +0300 |
| commit | 3c0a60195b37af83bbbaf223cd3a78945bace49e (patch) | |
| tree | f5b81fa2dbf2a4b350fd5e948ba6b95a95f2b150 | |
| parent | 151df689fb75e46a6cafa9a2c407d44969f4bebe (diff) | |
| download | linux-3c0a60195b37af83bbbaf223cd3a78945bace49e.tar.xz | |
drm/panthor: Recover from panthor_gpu_flush_caches() failures
We have seen a few cases where the whole memory subsystem is blocked
and flush operations never complete. When that happens, we want to:
- schedule a reset, so we can recover from this situation
- in the reset path, we need to reset the pending_reqs so we can send
new commands after the reset
- if more panthor_gpu_flush_caches() operations are queued after
the timeout, we skip them and return -EIO directly to avoid needless
waits (the memory block won't miraculously work again)
Note that we drop the WARN_ON()s because these hangs can be triggered
with buggy GPU jobs created by the UMD, and there's no way we can
prevent it. We do keep the error messages though.
v2:
- New patch
v3:
- Collect R-b
- Explicitly mention the fact we dropped the WARN_ON()s in the commit
message
v4:
- No changes
Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block")
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251128084841.3804658-4-boris.brezillon@collabora.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.c | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 06b231b2460a..9cb5dee93212 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -289,38 +289,42 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev) int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other) { - bool timedout = false; unsigned long flags; + int ret = 0; /* Serialize cache flush operations. */ guard(mutex)(&ptdev->gpu->cache_flush_lock); spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); - if (!drm_WARN_ON(&ptdev->base, - ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { + if (!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); + } else { + ret = -EIO; } spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); + if (ret) + return ret; + if (!wait_event_timeout(ptdev->gpu->reqs_acked, !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), msecs_to_jiffies(100))) { spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) - timedout = true; + ret = -ETIMEDOUT; else ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); } - if (timedout) { + if (ret) { + panthor_device_schedule_reset(ptdev); drm_err(&ptdev->base, "Flush caches timeout"); - return -ETIMEDOUT; } - return 0; + return ret; } /** @@ -360,6 +364,7 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev) return -ETIMEDOUT; } + ptdev->gpu->pending_reqs = 0; return 0; } |
