diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler/sched_main.c')
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 256 |
1 files changed, 159 insertions, 97 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 829579c41c6b..46119aacb809 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -66,6 +66,7 @@ * This implies waiting for previously executed jobs. */ +#include <linux/export.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/completion.h> @@ -83,12 +84,6 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -#ifdef CONFIG_LOCKDEP -static struct lockdep_map drm_sched_lockdep_map = { - .name = "drm_sched_lockdep_map" -}; -#endif - int drm_sched_policy = DRM_SCHED_POLICY_FIFO; /** @@ -268,38 +263,14 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current - * entity in terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; } } list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; if (entity == rq->current_entity) break; @@ -308,6 +279,22 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, spin_unlock(&rq->lock); return NULL; + +found: + if (!drm_sched_can_queue(sched, entity)) { + /* + * If scheduler cannot take more jobs signal the caller to not + * consider lower priority queues. + */ + entity = ERR_PTR(-ENOSPC); + } else { + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + } + + spin_unlock(&rq->lock); + + return entity; } /** @@ -362,36 +349,20 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) } /** - * __drm_sched_run_free_queue - enqueue free-job work + * drm_sched_run_free_queue - enqueue free-job work * @sched: scheduler instance */ -static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) +static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) { if (!READ_ONCE(sched->pause_submit)) queue_work(sched->submit_wq, &sched->work_free_job); } /** - * drm_sched_run_free_queue - enqueue free-job work if ready - * @sched: scheduler instance - */ -static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *job; - - spin_lock(&sched->job_list_lock); - job = list_first_entry_or_null(&sched->pending_list, - struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) - __drm_sched_run_free_queue(sched); - spin_unlock(&sched->job_list_lock); -} - -/** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done * - * Finish the job's fence and wake up the worker thread. + * Finish the job's fence and resubmit the work items. */ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) { @@ -401,12 +372,12 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) atomic_sub(s_job->credits, &sched->credit_count); atomic_dec(sched->score); - trace_drm_sched_process_job(s_fence); + trace_drm_sched_job_done(s_fence); dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - __drm_sched_run_free_queue(sched); + drm_sched_run_free_queue(sched); } /** @@ -536,11 +507,37 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) spin_unlock(&sched->job_list_lock); } +/** + * drm_sched_job_reinsert_on_false_timeout - reinsert the job on a false timeout + * @sched: scheduler instance + * @job: job to be reinserted on the pending list + * + * In the case of a "false timeout" - when a timeout occurs but the GPU isn't + * hung and is making progress, the scheduler must reinsert the job back into + * @sched->pending_list. Otherwise, the job and its resources won't be freed + * through the &struct drm_sched_backend_ops.free_job callback. + * + * This function must be used in "false timeout" cases only. + */ +static void drm_sched_job_reinsert_on_false_timeout(struct drm_gpu_scheduler *sched, + struct drm_sched_job *job) +{ + spin_lock(&sched->job_list_lock); + list_add(&job->list, &sched->pending_list); + + /* After reinserting the job, the scheduler enqueues the free-job work + * again if ready. Otherwise, a signaled job could be added to the + * pending list, but never freed. + */ + drm_sched_run_free_queue(sched); + spin_unlock(&sched->job_list_lock); +} + static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); @@ -551,9 +548,10 @@ static void drm_sched_job_timedout(struct work_struct *work) if (job) { /* - * Remove the bad job so it cannot be freed by concurrent - * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread - * is parked at which point it's safe. + * Remove the bad job so it cannot be freed by a concurrent + * &struct drm_sched_backend_ops.free_job. It will be + * reinserted after the scheduler's work items have been + * cancelled, at which point it's safe. */ list_del_init(&job->list); spin_unlock(&sched->job_list_lock); @@ -568,6 +566,9 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->free_job(job); sched->free_guilty = false; } + + if (status == DRM_GPU_SCHED_STAT_NO_HANG) + drm_sched_job_reinsert_on_false_timeout(sched, job); } else { spin_unlock(&sched->job_list_lock); } @@ -590,6 +591,10 @@ static void drm_sched_job_timedout(struct work_struct *work) * This function is typically used for reset recovery (see the docu of * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler teardown, i.e., before calling drm_sched_fini(). + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { @@ -599,10 +604,10 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) /* * Reinsert back the bad job here - now it's safe as - * drm_sched_get_finished_job cannot race against us and release the + * drm_sched_get_finished_job() cannot race against us and release the * bad job at this point - we parked (waited for) any in progress - * (earlier) cleanups and drm_sched_get_finished_job will not be called - * now until the scheduler thread is unparked. + * (earlier) cleanups and drm_sched_get_finished_job() will not be + * called now until the scheduler's work items are submitted again. */ if (bad && bad->sched == sched) /* @@ -615,7 +620,8 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * Iterate the job list from later to earlier one and either deactive * their HW callbacks or remove them from pending list if they already * signaled. - * This iteration is thread safe as sched thread is stopped. + * This iteration is thread safe as the scheduler's work items have been + * cancelled. */ list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list, list) { @@ -674,15 +680,19 @@ EXPORT_SYMBOL(drm_sched_stop); * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler startup. The scheduler itself is fully operational after * drm_sched_init() succeeded. + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_start(struct drm_gpu_scheduler *sched, int errno) { struct drm_sched_job *s_job, *tmp; /* - * Locking the list is not required here as the sched thread is parked - * so no new jobs are being inserted or removed. Also concurrent - * GPU recovers can't run in parallel. + * Locking the list is not required here as the scheduler's work items + * are currently not running, so no new jobs are being inserted or + * removed. Also concurrent GPU recovers can't run in parallel. */ list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { struct dma_fence *fence = s_job->s_fence->parent; @@ -764,6 +774,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); * @credits: the number of credits this job contributes to the schedulers * credit limit * @owner: job owner for debugging + * @drm_client_id: &struct drm_file.client_id of the owner (used by trace + * events) * * Refer to drm_sched_entity_push_job() documentation * for locking considerations. @@ -784,7 +796,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, - u32 credits, void *owner) + u32 credits, void *owner, + uint64_t drm_client_id) { if (!entity->rq) { /* This will most likely be followed by missing frames @@ -810,7 +823,7 @@ int drm_sched_job_init(struct drm_sched_job *job, job->entity = entity; job->credits = credits; - job->s_fence = drm_sched_fence_alloc(entity, owner); + job->s_fence = drm_sched_fence_alloc(entity, owner, drm_client_id); if (!job->s_fence) return -ENOMEM; @@ -850,7 +863,6 @@ void drm_sched_job_arm(struct drm_sched_job *job) job->sched = sched; job->s_priority = entity->priority; - job->id = atomic64_inc_return(&sched->job_id_count); drm_sched_fence_init(job->s_fence, job->entity); } @@ -1101,12 +1113,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance + * @have_more: are there more finished jobs on the list + * + * Informs the caller through @have_more whether there are more finished jobs + * besides the returned one. * * Returns the next finished job from the pending list (if there is one) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) { struct drm_sched_job *job, *next; @@ -1114,22 +1130,25 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - /* make the scheduled timestamp more accurate */ + + *have_more = false; next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + /* make the scheduled timestamp more accurate */ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &next->s_fence->scheduled.flags)) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); + + *have_more = dma_fence_is_signaled(&next->s_fence->finished); + /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -1188,12 +1207,15 @@ static void drm_sched_free_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; + bool have_more; - job = drm_sched_get_finished_job(sched); - if (job) + job = drm_sched_get_finished_job(sched, &have_more); + if (job) { sched->ops->free_job(job); + if (have_more) + drm_sched_run_free_queue(sched); + } - drm_sched_run_free_queue(sched); drm_sched_run_job_queue(sched); } @@ -1229,7 +1251,7 @@ static void drm_sched_run_job_work(struct work_struct *w) atomic_add(sched_job->credits, &sched->credit_count); drm_sched_job_begin(sched_job); - trace_drm_run_job(sched_job, entity); + trace_drm_sched_job_run(sched_job, entity); /* * The run_job() callback must by definition return a fence whose * refcount has been incremented for the scheduler already. @@ -1256,6 +1278,25 @@ static void drm_sched_run_job_work(struct work_struct *w) drm_sched_run_job_queue(sched); } +static struct workqueue_struct *drm_sched_alloc_wq(const char *name) +{ +#if (IS_ENABLED(CONFIG_LOCKDEP)) + static struct lockdep_map map = { + .name = "drm_sched_lockdep_map" + }; + + /* + * Avoid leaking a lockdep map on each drm sched creation and + * destruction by using a single lockdep map for all drm sched + * allocated submit_wq. + */ + + return alloc_ordered_workqueue_lockdep_map(name, WQ_MEM_RECLAIM, &map); +#else + return alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); +#endif +} + /** * drm_sched_init - Init a gpu scheduler instance * @@ -1296,13 +1337,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->submit_wq = args->submit_wq; sched->own_submit_wq = false; } else { -#ifdef CONFIG_LOCKDEP - sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name, - WQ_MEM_RECLAIM, - &drm_sched_lockdep_map); -#else - sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM); -#endif + sched->submit_wq = drm_sched_alloc_wq(args->name); if (!sched->submit_wq) return -ENOMEM; @@ -1348,6 +1383,18 @@ Out_check_own: } EXPORT_SYMBOL(drm_sched_init); +static void drm_sched_cancel_remaining_jobs(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job, *tmp; + + /* All other accessors are stopped. No locking necessary. */ + list_for_each_entry_safe_reverse(job, tmp, &sched->pending_list, list) { + sched->ops->cancel_job(job); + list_del(&job->list); + sched->ops->free_job(job); + } +} + /** * drm_sched_fini - Destroy a gpu scheduler * @@ -1355,19 +1402,11 @@ EXPORT_SYMBOL(drm_sched_init); * * Tears down and cleans up the scheduler. * - * This stops submission of new jobs to the hardware through - * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job() - * will not be called for all jobs still in drm_gpu_scheduler.pending_list. - * There is no solution for this currently. Thus, it is up to the driver to make - * sure that: - * - * a) drm_sched_fini() is only called after for all submitted jobs - * drm_sched_backend_ops.free_job() has been called or that - * b) the jobs for which drm_sched_backend_ops.free_job() has not been called - * after drm_sched_fini() ran are freed manually. - * - * FIXME: Take care of the above problem and prevent this function from leaking - * the jobs in drm_gpu_scheduler.pending_list under any circumstances. + * This stops submission of new jobs to the hardware through &struct + * drm_sched_backend_ops.run_job. If &struct drm_sched_backend_ops.cancel_job + * is implemented, all jobs will be canceled through it and afterwards cleaned + * up through &struct drm_sched_backend_ops.free_job. If cancel_job is not + * implemented, memory could leak. */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { @@ -1385,6 +1424,22 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) * Prevents reinsertion and marks job_queue as idle, * it will be removed from the rq in drm_sched_entity_fini() * eventually + * + * FIXME: + * This lacks the proper spin_lock(&s_entity->lock) and + * is, therefore, a race condition. Most notably, it + * can race with drm_sched_entity_push_job(). The lock + * cannot be taken here, however, because this would + * lead to lock inversion -> deadlock. + * + * The best solution probably is to enforce the life + * time rule of all entities having to be torn down + * before their scheduler. Then, however, locking could + * be dropped alltogether from this function. + * + * For now, this remains a potential race in all + * drivers that keep entities alive for longer than + * the scheduler. */ s_entity->stopped = true; spin_unlock(&rq->lock); @@ -1397,11 +1452,18 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); + /* Avoid memory leaks if supported by the driver. */ + if (sched->ops->cancel_job) + drm_sched_cancel_remaining_jobs(sched); + if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; kfree(sched->sched_rq); sched->sched_rq = NULL; + + if (!list_empty(&sched->pending_list)) + dev_warn(sched->dev, "Tearing down scheduler while jobs are pending!\n"); } EXPORT_SYMBOL(drm_sched_fini); |