summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Brost <matthew.brost@intel.com>2026-02-18 07:33:18 +0300
committerMatthew Brost <matthew.brost@intel.com>2026-02-26 21:16:45 +0300
commit74bbd87dcc5c102147e24058e8db97a228d6ee03 (patch)
treef67d88286decd9916da049a8a4ab7606286f66b8
parentaf3de6cf06f9497a60510b87bf77e8fb4124ba9f (diff)
downloadlinux-74bbd87dcc5c102147e24058e8db97a228d6ee03.tar.xz
drm/xe: Avoid unconditional VRAM reads in H2G path
desc_read() issues an VRAM read which serializes the CPU and drains posted writes on dGPU platforms. The H2G tracepoint evaluated its arguments unconditionally, so even with tracing disabled the submission path paid the full VRAM readf latency. Guard the tracepoint with trace_xe_guc_ctb_h2g_enabled(). Adso move the descriptor status verification under CONFIG_DRM_XE_DEBUG. This removes another unnecessary VRAM read in non-debug builfds. This results in ~10× faster H2G submission and significantly reduces lock contention across the driver. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Stuart Summers <stuart.summers@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patch.msgid.link/20260218043319.809548-3-matthew.brost@intel.com
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 018dd64ab1d5..10fbdeb0550c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -939,22 +939,22 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
tail * sizeof(u32));
- u32 desc_status;
full_len = len + GUC_CTB_HDR_LEN;
lockdep_assert_held(&ct->lock);
xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
- desc_status = desc_read(xe, h2g, status);
- if (desc_status) {
- xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
- goto corrupted;
- }
-
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
u32 desc_tail = desc_read(xe, h2g, tail);
u32 desc_head = desc_read(xe, h2g, head);
+ u32 desc_status;
+
+ desc_status = desc_read(xe, h2g, status);
+ if (desc_status) {
+ xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status);
+ goto corrupted;
+ }
if (tail != desc_tail) {
desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
@@ -1023,8 +1023,15 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
/* Update descriptor */
desc_write(xe, h2g, tail, h2g->info.tail);
- trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
- desc_read(xe, h2g, head), h2g->info.tail);
+ /*
+ * desc_read() performs an VRAM read which serializes the CPU and drains
+ * posted writes on dGPU platforms. Tracepoints evaluate arguments even
+ * when disabled, so guard the event to avoid adding µs-scale latency to
+ * the fast H2G submission path when tracing is not active.
+ */
+ if (trace_xe_guc_ctb_h2g_enabled())
+ trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
+ desc_read(xe, h2g, head), h2g->info.tail);
return 0;