summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Zhen <max.zhen@amd.com>2026-04-21 21:15:02 +0300
committerLizhi Hou <lizhi.hou@amd.com>2026-04-22 18:35:21 +0300
commit8711eb2dde2ed44c98714b875dcf7329950c71ba (patch)
tree494eaa33cd274e061de4756edf742b661511c409
parent3a9f1d470619a7b0c0bdee564d312bb1424dfe01 (diff)
downloadlinux-8711eb2dde2ed44c98714b875dcf7329950c71ba.tar.xz
accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker
Add more trace coverage to amdxdna job handling and mailbox receive processing to make driver execution easier to debug. Extend the xdna_job trace event to record the command opcode in addition to the job sequence number. Use the enhanced tracepoint in the job run, sent-to-device, signaled-fence, and job-free paths so that trace output can be correlated with the command being executed. Also add debug-point tracing when a command is received through the submit ioctl path, and add a trace event when the mailbox RX worker runs. These changes improve visibility into job lifetime transitions and mailbox activity, which helps debug command flow and scheduler issues. Signed-off-by: Max Zhen <max.zhen@amd.com> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> Link: https://patch.msgid.link/20260421181502.1970263-1-lizhi.hou@amd.com
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c14
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c3
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.h1
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c1
-rw-r--r--include/trace/events/amdxdna.h42
5 files changed, 42 insertions, 19 deletions
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index d37123d925b6..3b0feba448c4 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref)
struct amdxdna_sched_job *job;
job = container_of(ref, struct amdxdna_sched_job, refcnt);
+
amdxdna_sched_job_cleanup(job);
atomic64_inc(&job->hwctx->job_free_cnt);
wake_up(&job->hwctx->priv->job_free_wq);
@@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
{
struct dma_fence *fence = job->fence;
- trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+ trace_xdna_job(&job->base, job->hwctx->name, "signaling fence",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
aie2_tdr_signal(job->hwctx->client->xdna);
job->hwctx->priv->completed++;
@@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
int ret;
+ trace_xdna_job(sched_job, hwctx->name, "job run",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
+
if (!hwctx->priv->mbox_chann)
return NULL;
@@ -409,7 +414,8 @@ out:
} else {
aie2_tdr_signal(hwctx->client->xdna);
}
- trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+ trace_xdna_job(sched_job, hwctx->name, "sent to device",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
return fence;
}
@@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job)
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
struct amdxdna_hwctx *hwctx = job->hwctx;
- trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ trace_xdna_job(sched_job, hwctx->name, "job free",
+ job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO);
if (!job->job_done)
up(&hwctx->priv->job_sem);
@@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
int ret;
xdna = hwctx->client->xdna;
- trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
guard(mutex)(&xdna->dev_lock);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index ff6c3e8e5a15..2c2c21992c87 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
goto unlock_srcu;
}
-
job->hwctx = hwctx;
job->mm = current->mm;
@@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
if (args->ext || args->ext_flags)
return -EINVAL;
+ trace_amdxdna_debug_point(current->comm, args->type, "job received");
+
switch (args->type) {
case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
return amdxdna_drm_submit_execbuf(client, args);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index a8557d7e8923..355798687376 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -119,6 +119,7 @@ struct amdxdna_hwctx {
container_of(j, struct amdxdna_sched_job, base)
enum amdxdna_job_opcode {
+ DEFAULT_IO,
SYNC_DEBUG_BO,
ATTACH_DEBUG_BO,
DETACH_DEBUG_BO,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 37771bdb24a1..cc8865f4e79c 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
int ret;
mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
+ trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);
if (READ_ONCE(mb_chann->bad_state)) {
MB_ERR(mb_chann, "Channel in bad state, work aborted");
diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
index c6cb2da7b706..71da24267e52 100644
--- a/include/trace/events/amdxdna.h
+++ b/include/trace/events/amdxdna.h
@@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point,
);
TRACE_EVENT(xdna_job,
- TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
+ TP_PROTO(struct drm_sched_job *sched_job, const char *name,
+ const char *str, u64 seq, u32 op),
- TP_ARGS(sched_job, name, str, seq),
+ TP_ARGS(sched_job, name, str, seq, op),
TP_STRUCT__entry(__string(name, name)
__string(str, str)
__field(u64, fence_context)
__field(u64, fence_seqno)
- __field(u64, seq)),
+ __field(u64, seq)
+ __field(u32, op)),
TP_fast_assign(__assign_str(name);
__assign_str(str);
__entry->fence_context = sched_job->s_fence->finished.context;
__entry->fence_seqno = sched_job->s_fence->finished.seqno;
- __entry->seq = seq;),
+ __entry->seq = seq;
+ __entry->op = op;),
- TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
+ TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u",
__entry->fence_context, __entry->fence_seqno,
__get_str(name), __entry->seq,
- __get_str(str))
+ __get_str(str),
+ __entry->op)
);
DECLARE_EVENT_CLASS(xdna_mbox_msg,
@@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head,
TP_ARGS(name, chann_id, opcode, id)
);
-TRACE_EVENT(mbox_irq_handle,
- TP_PROTO(char *name, int irq),
+DECLARE_EVENT_CLASS(xdna_mbox_name_id,
+ TP_PROTO(char *name, int irq),
- TP_ARGS(name, irq),
+ TP_ARGS(name, irq),
- TP_STRUCT__entry(__string(name, name)
- __field(int, irq)),
+ TP_STRUCT__entry(__string(name, name)
+ __field(int, irq)),
- TP_fast_assign(__assign_str(name);
- __entry->irq = irq;),
+ TP_fast_assign(__assign_str(name);
+ __entry->irq = irq;),
+
+ TP_printk("%s.%d", __get_str(name), __entry->irq)
+);
+
+DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
+ TP_PROTO(char *name, int irq),
+ TP_ARGS(name, irq)
+);
- TP_printk("%s.%d", __get_str(name), __entry->irq)
+DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
+ TP_PROTO(char *name, int irq),
+ TP_ARGS(name, irq)
);
#endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */