summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>2025-12-11 04:02:59 +0300
committerNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>2025-12-12 06:21:41 +0300
commitc85285b32cc697d7612ee28a9ea9ded5e53d2b57 (patch)
treee51516906d337c67b692a3c6e7eab83c92045b5e
parent1b5d39e6672fdee158c3306f5cb2df8975c77e5a (diff)
downloadlinux-c85285b32cc697d7612ee28a9ea9ded5e53d2b57.tar.xz
drm/xe/multi_queue: Handle CGP context error
Trigger multi-queue context cleanup upon CGP context error notification from GuC. v4: Fix error message Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patch.msgid.link/20251211010249.1647839-30-niranjana.vishwanathapura@intel.com
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c31
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h5
5 files changed, 43 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 3e9fbed9cda6..8af3691626bf 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum xe_guc_action {
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602,
XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603,
XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604,
+ XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR = 0x4605,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 4d5b4ed357cc..3e49e7fd0031 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1618,6 +1618,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len);
break;
+ case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR:
+ ret = xe_guc_exec_queue_cgp_context_error_handler(guc, payload,
+ adj_len);
+ break;
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d38f5aab0a99..3be5e78485c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -48,6 +48,8 @@
#include "xe_uc_fw.h"
#include "xe_vm.h"
+#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
+
static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue *q)
{
@@ -3009,6 +3011,35 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
return 0;
}
+int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q;
+ u32 guc_id = msg[2];
+
+ if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ q = g2h_exec_queue_lookup(guc, guc_id);
+ if (unlikely(!q))
+ return -EPROTO;
+
+ xe_gt_dbg(gt,
+ "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
+ msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
+
+ trace_xe_exec_queue_cgp_context_error(q);
+
+ /* Treat the same as engine reset */
+ xe_guc_exec_queue_reset_trigger_cleanup(q);
+
+ return 0;
+}
+
/**
* xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
* @guc: guc
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index ad8c0e8e0415..4d89b2975fe9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -37,6 +37,8 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 79a97b086cb2..c9d0748dae9d 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -172,6 +172,11 @@ DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
TP_ARGS(q)
);
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cgp_context_error,
+ TP_PROTO(struct xe_exec_queue *q),
+ TP_ARGS(q)
+);
+
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)