summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_guc_submit.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c88
1 files changed, 44 insertions, 44 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f22ae717b0b2..ff77bc8da1b2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -23,6 +23,7 @@
#include "xe_force_wake.h"
#include "xe_gpu_scheduler.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
@@ -311,7 +312,7 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
q->guc->id - GUC_ID_START_MLRC,
order_base_2(q->width));
else
- ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+ ida_free(&guc->submission_state.guc_ids, q->guc->id);
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -335,8 +336,8 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
order_base_2(q->width));
} else {
- ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
- GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+ ret = ida_alloc_max(&guc->submission_state.guc_ids,
+ GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT);
}
if (ret < 0)
return ret;
@@ -811,7 +812,8 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
static void simple_error_capture(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct drm_printer p = drm_err_printer("");
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_err_printer(&xe->drm, NULL);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
u32 adj_logical_mask = q->logical_mask;
@@ -928,13 +930,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int i = 0;
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
-
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
+ "Kernel-submitted job timed out\n");
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
+ "VM job timed out on non-killed execqueue\n");
+
simple_error_capture(q);
- xe_devcoredump(q);
+ xe_devcoredump(job);
} else {
drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
@@ -1216,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
init_waitqueue_head(&ge->suspend_wait);
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
- q->hwe->eclass->sched_props.job_timeout_ms;
+ q->sched_props.job_timeout_ms;
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
get_submit_wq(guc),
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
@@ -1348,21 +1352,6 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
return 0;
}
-static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
-{
- struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
-
- xe_assert(xe, !exec_queue_registered(q));
- xe_assert(xe, !exec_queue_banned(q));
- xe_assert(xe, !exec_queue_killed(q));
-
- sched->base.timeout = job_timeout_ms;
-
- return 0;
-}
-
static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
@@ -1413,7 +1402,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
- .set_job_timeout = guc_exec_queue_set_job_timeout,
.suspend = guc_exec_queue_suspend,
.suspend_wait = guc_exec_queue_suspend_wait,
.resume = guc_exec_queue_resume,
@@ -1794,7 +1782,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
/**
* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
- * @q: Xe exec queue.
+ * @job: faulty Xe scheduled job.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -1803,21 +1791,17 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
* caller, using `xe_guc_exec_queue_snapshot_free`.
*/
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
{
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_sched_job *job;
struct xe_guc_submit_exec_queue_snapshot *snapshot;
int i;
snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
- if (!snapshot) {
- drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+ if (!snapshot)
return NULL;
- }
snapshot->guc.id = q->guc->id;
memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
@@ -1833,9 +1817,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
GFP_ATOMIC);
- if (!snapshot->lrc) {
- drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
- } else {
+ if (snapshot->lrc) {
for (i = 0; i < q->width; ++i) {
struct xe_lrc *lrc = q->lrc + i;
@@ -1863,17 +1845,17 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
sizeof(struct pending_list_snapshot),
GFP_ATOMIC);
- if (!snapshot->pending_list) {
- drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
- } else {
+ if (snapshot->pending_list) {
+ struct xe_sched_job *job_iter;
+
i = 0;
- list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
snapshot->pending_list[i].seqno =
- xe_sched_job_seqno(job);
+ xe_sched_job_seqno(job_iter);
snapshot->pending_list[i].fence =
- dma_fence_is_signaled(job->fence) ? 1 : 0;
+ dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
snapshot->pending_list[i].finished =
- dma_fence_is_signaled(&job->drm.s_fence->finished)
+ dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
? 1 : 0;
i++;
}
@@ -1959,10 +1941,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
{
struct xe_guc_submit_exec_queue_snapshot *snapshot;
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_job *job;
+ bool found = false;
- snapshot = xe_guc_exec_queue_snapshot_capture(q);
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ if (job->q == q) {
+ xe_sched_job_get(job);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&sched->base.job_list_lock);
+
+ if (!found)
+ return;
+
+ snapshot = xe_guc_exec_queue_snapshot_capture(job);
xe_guc_exec_queue_snapshot_print(snapshot, p);
xe_guc_exec_queue_snapshot_free(snapshot);
+
+ xe_sched_job_put(job);
}
/**