From 35dc4ce200623fec8f8eda256cd8abb5befbfae2 Mon Sep 17 00:00:00 2001 From: André Almeida Date: Tue, 17 Jun 2025 09:49:48 -0300 Subject: drm: amdgpu: Use struct drm_wedge_task_info inside of struct amdgpu_task_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid a cast when calling drm_dev_wedged_event(), replace pid and task name inside of struct amdgpu_task_info with struct drm_wedge_task_info. Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250617124949.2151549-6-andrealmeid@igalia.com Signed-off-by: André Almeida --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e626f50b362..dac4b926e7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1786,7 +1786,7 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) ti = amdgpu_vm_get_task_info_vm(vm); if (ti) { - seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name); amdgpu_vm_put_task_info(ti); } -- cgit v1.2.3 From bf1cd14f9e2e1fdf981eed273ddd595863f5288c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Jun 2025 11:31:52 -0400 Subject: drm/amdgpu: switch job hw_fence to amdgpu_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the amdgpu fence container so we can store additional data in the fence. This also fixes the start_time handling for MCBP since we were casting the fence to an amdgpu_fence and it wasn't. Fixes: 3f4c175d62d8 ("drm/amdgpu: MCBP based on DRM scheduler (v9)") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 +++++++---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 +++++++++++++++ 6 files changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e626f50b362..f81608330a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c2d10be53d3e..94db61c75fff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6422,7 +6422,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5f5c00ace96b..f5855c412321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce..ddb9d3269357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index f2c049129661..931fed8892cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b95b47110769..e1f25218943a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -127,6 +127,22 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); -- cgit v1.2.3 From 719b378d371899c8bbaf0ce5f42bf7db4be819f9 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 4 Jul 2025 13:25:47 +0530 Subject: drm/amdgpu: add debugfs support for VM pagetable per client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a debugfs file under the client directory which shares the root page table base address of the VM. This address could be used to dump the pagetable for debug memory issues. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250704075548.1549849-4-sunil.khatri@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 52 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ 3 files changed, 55 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index dac4b926e7be..e49890a23ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2131,6 +2131,55 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_pt_info_read(struct seq_file *m, void *unused) +{ + struct drm_file *file; + struct amdgpu_fpriv *fpriv; + struct amdgpu_bo *root_bo; + int r; + + file = m->private; + if (!file) + return -EINVAL; + + fpriv = file->driver_priv; + if (!fpriv && !fpriv->vm.root.bo) + return -ENODEV; + + root_bo = amdgpu_bo_ref(fpriv->vm.root.bo); + r = amdgpu_bo_reserve(root_bo, true); + if (r) { + amdgpu_bo_unref(&root_bo); + return -EINVAL; + } + + seq_printf(m, "gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo)); + + amdgpu_bo_unreserve(root_bo); + amdgpu_bo_unref(&root_bo); + + return 0; +} + +static int amdgpu_pt_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, amdgpu_pt_info_read, inode->i_private); +} + +static const struct file_operations amdgpu_pt_info_fops = { + .owner = THIS_MODULE, + .open = amdgpu_pt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ + debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file, + &amdgpu_pt_info_fops); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2140,4 +2189,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; } +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 0425432d8659..e7b3c38e5186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -33,4 +33,5 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); +void amdgpu_debugfs_vm_init(struct drm_file *file); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d2ce7d86dbc8..d555853c5717 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1395,6 +1395,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; + amdgpu_debugfs_vm_init(file_priv); + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); if (r) goto error_pasid; -- cgit v1.2.3 From 03d5236014a5d298c26b2ca80e5834aa844574b3 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 9 Jul 2025 12:46:18 +0530 Subject: drm/amdgpu: fix the logic to validate fpriv and root bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the smatch warning, smatch warnings: drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c:2146 amdgpu_pt_info_read() error: we previously assumed 'fpriv' could be null (see line 2146) "if (!fpriv && !fpriv->vm.root.bo)", It has to be an OR condition rather than an AND which makes an NULL dereference in case fpriv is NULL. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507090525.9rDWGhz3-lkp@intel.com/ Signed-off-by: Sunil Khatri Link: https://lore.kernel.org/r/20250709071618.591866-1-sunil.khatri@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2353455d4aff..0e6e2e2acf5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2143,7 +2143,7 @@ static int amdgpu_pt_info_read(struct seq_file *m, void *unused) return -EINVAL; fpriv = file->driver_priv; - if (!fpriv && !fpriv->vm.root.bo) + if (!fpriv || !fpriv->vm.root.bo) return -ENODEV; root_bo = amdgpu_bo_ref(fpriv->vm.root.bo); -- cgit v1.2.3