diff options
-rw-r--r-- | drivers/gpu/drm/xe/xe_devcoredump.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_devcoredump.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.h | 4 |
4 files changed, 42 insertions, 20 deletions
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 68abc0b195be..fd74dae29243 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -16,6 +16,7 @@ #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" +#include "xe_sched_job.h" /** * DOC: Xe device coredump @@ -123,9 +124,10 @@ static void xe_devcoredump_free(void *data) } static void devcoredump_snapshot(struct xe_devcoredump *coredump, - struct xe_exec_queue *q) + struct xe_sched_job *job) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; @@ -150,7 +152,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); - coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); for_each_hw_engine(hwe, q->gt, id) { if (hwe->class != q->hwe->class || @@ -167,15 +169,15 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, /** * xe_devcoredump - Take the required snapshots and initialize coredump device. - * @q: The faulty xe_exec_queue, where the issue was detected. + * @job: The faulty xe_sched_job, where the issue was detected. * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_exec_queue *q) +void xe_devcoredump(struct xe_sched_job *job) { - struct xe_device *xe = gt_to_xe(q->gt); + struct xe_device *xe = gt_to_xe(job->q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; if (coredump->captured) { @@ -184,7 +186,7 @@ void xe_devcoredump(struct xe_exec_queue *q) } coredump->captured = true; - devcoredump_snapshot(coredump, q); + devcoredump_snapshot(coredump, job); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6ac218a5c194..df8671f0b5eb 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -7,12 +7,12 @@ #define _XE_DEVCOREDUMP_H_ struct xe_device; -struct xe_exec_queue; +struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_exec_queue *q); +void xe_devcoredump(struct xe_sched_job *job); #else -static inline void xe_devcoredump(struct xe_exec_queue *q) +static inline void xe_devcoredump(struct xe_sched_job *job) { } #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 7c29b8333c71..2b008ec1b6de 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -934,7 +934,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), q->guc->id, q->flags); simple_error_capture(q); - xe_devcoredump(q); + xe_devcoredump(job); } else { drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), q->guc->id, q->flags); @@ -1780,7 +1780,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps /** * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @q: Xe exec queue. + * @job: faulty Xe scheduled job. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -1789,12 +1789,12 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps * caller, using `xe_guc_exec_queue_snapshot_free`. */ struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) +xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) { + struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_sched_job *job; struct xe_guc_submit_exec_queue_snapshot *snapshot; int i; @@ -1852,14 +1852,16 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) if (!snapshot->pending_list) { drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); } else { + struct xe_sched_job *job_iter; + i = 0; - list_for_each_entry(job, &sched->base.pending_list, drm.list) { + list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { snapshot->pending_list[i].seqno = - xe_sched_job_seqno(job); + xe_sched_job_seqno(job_iter); snapshot->pending_list[i].fence = - dma_fence_is_signaled(job->fence) ? 1 : 0; + dma_fence_is_signaled(job_iter->fence) ? 1 : 0; snapshot->pending_list[i].finished = - dma_fence_is_signaled(&job->drm.s_fence->finished) + dma_fence_is_signaled(&job_iter->drm.s_fence->finished) ? 1 : 0; i++; } @@ -1945,10 +1947,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) { struct xe_guc_submit_exec_queue_snapshot *snapshot; + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + bool found = false; + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + if (job->q == q) { + xe_sched_job_get(job); + found = true; + break; + } + } + spin_unlock(&sched->base.job_list_lock); - snapshot = xe_guc_exec_queue_snapshot_capture(q); + if (!found) + return; + + snapshot = xe_guc_exec_queue_snapshot_capture(job); xe_guc_exec_queue_snapshot_print(snapshot, p); xe_guc_exec_queue_snapshot_free(snapshot); + + xe_sched_job_put(job); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index fc97869c5b86..723dc2bd8df9 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -9,8 +9,8 @@ #include <linux/types.h> struct drm_printer; -struct xe_exec_queue; struct xe_guc; +struct xe_sched_job; int xe_guc_submit_init(struct xe_guc *guc); @@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); +xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); void xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, struct drm_printer *p); |