summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
3 files changed, 35 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b76c1cfad7f1..540a38fe5cd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1298,6 +1298,8 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+ struct amdgpu_job *job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 00123b0013d3..15e8fde3ac2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5033,7 +5033,7 @@ retry:
* Returns 0 for success or an error on failure.
*/
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct list_head device_list, *device_list_handle = NULL;
@@ -5292,6 +5292,37 @@ skip_recovery:
return r;
}
+struct amdgpu_recover_work_struct {
+ struct work_struct base;
+ struct amdgpu_device *adev;
+ struct amdgpu_job *job;
+ int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
+{
+ struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
+
+ recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
+
+ INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+ if (!queue_work(adev->reset_domain.wq, &work.base))
+ return -EAGAIN;
+
+ flush_work(&work.base);
+
+ return work.ret;
+}
+
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index bfc47bea23db..38c9fd7b7ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ti.process_name, ti.tgid, ti.task_name, ti.pid);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
- amdgpu_device_gpu_recover(ring->adev, job);
+ amdgpu_device_gpu_recover_imp(ring->adev, job);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))