diff options
| author | Karol Wachowski <karol.wachowski@intel.com> | 2025-01-07 20:32:34 +0300 |
|---|---|---|
| committer | Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> | 2025-01-09 11:35:45 +0300 |
| commit | ab680dc6c78aa035e944ecc8c48a1caab9f39924 (patch) | |
| tree | fd4b311e201ed7c676eb9a2f199ddfab4b771bff | |
| parent | e52443608934952fc978234cf7d639d6aa3f1856 (diff) | |
| download | linux-ab680dc6c78aa035e944ecc8c48a1caab9f39924.tar.xz | |
accel/ivpu: Fix locking order in ivpu_job_submit
Fix deadlock in job submission and abort handling.
When a thread aborts currently executing jobs due to a fault,
it first locks the global lock protecting submitted_jobs (#1).
After the last job is destroyed, it proceeds to release the related context
and locks file_priv (#2). Meanwhile, in the job submission thread,
the file_priv lock (#2) is taken first, and then the submitted_jobs
lock (#1) is obtained when a job is added to the submitted jobs list.
CPU0 CPU1
---- ----
(for example due to a fault) (jobs submissions keep coming)
lock(&vdev->submitted_jobs_lock) #1
ivpu_jobs_abort_all()
job_destroy()
lock(&file_priv->lock) #2
lock(&vdev->submitted_jobs_lock) #1
file_priv_release()
lock(&vdev->context_list_lock)
lock(&file_priv->lock) #2
This order of locking causes a deadlock. To resolve this issue,
change the order of locking in ivpu_job_submit().
Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-12-maciej.falkowski@linux.intel.com
| -rw-r--r-- | drivers/accel/ivpu/ivpu_job.c | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index c694822a14bf..c93ea37062d7 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -597,6 +597,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) if (ret < 0) return ret; + mutex_lock(&vdev->submitted_jobs_lock); mutex_lock(&file_priv->lock); if (cmdq_id == 0) @@ -606,19 +607,17 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) if (!cmdq) { ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d\n", file_priv->ctx.id); ret = -EINVAL; - goto err_unlock_file_priv; + goto err_unlock; } ret = ivpu_cmdq_register(file_priv, cmdq); if (ret) { ivpu_err(vdev, "Failed to register command queue: %d\n", ret); - goto err_unlock_file_priv; + goto err_unlock; } job->cmdq_id = cmdq->id; - mutex_lock(&vdev->submitted_jobs_lock); - is_first_job = xa_empty(&vdev->submitted_jobs_xa); ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, &file_priv->job_id_next, GFP_KERNEL); @@ -626,7 +625,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; - goto err_unlock_submitted_jobs; + goto err_unlock; } ret = ivpu_cmdq_push_job(cmdq, job); @@ -649,22 +648,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { - mutex_lock(&vdev->submitted_jobs_lock); ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); - mutex_unlock(&vdev->submitted_jobs_lock); } + mutex_unlock(&vdev->submitted_jobs_lock); + return 0; err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_unlock_submitted_jobs: +err_unlock: mutex_unlock(&vdev->submitted_jobs_lock); -err_unlock_file_priv: mutex_unlock(&file_priv->lock); ivpu_rpm_put(vdev); return ret; |
