diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nouveau_sched.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_sched.c | 207 |
1 files changed, 116 insertions, 91 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index e03fd2bc8a11..dd98f6910f9c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -12,30 +12,28 @@ #include "nouveau_abi16.h" #include "nouveau_sched.h" -/* FIXME - * - * We want to make sure that jobs currently executing can't be deferred by - * other jobs competing for the hardware. Otherwise we might end up with job - * timeouts just because of too many clients submitting too many jobs. We don't - * want jobs to time out because of system load, but because of the job being - * too bulky. - * - * For now allow for up to 16 concurrent jobs in flight until we know how many - * rings the hardware can process in parallel. - */ -#define NOUVEAU_SCHED_HW_SUBMISSIONS 16 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 +/* Starts at 0, since the DRM scheduler interprets those parameters as (initial) + * index to the run-queue array. + */ +enum nouveau_sched_priority { + NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL, + NOUVEAU_SCHED_PRIORITY_COUNT, +}; + int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args) { - struct nouveau_sched_entity *entity = args->sched_entity; + struct nouveau_sched *sched = args->sched; int ret; + INIT_LIST_HEAD(&job->entry); + job->file_priv = args->file_priv; job->cli = nouveau_cli(args->file_priv); - job->entity = entity; + job->sched = sched; job->sync = args->sync; job->resv_usage = args->resv_usage; @@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job, ret = -ENOMEM; goto err_free_objs; } - } - ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL); + ret = drm_sched_job_init(&job->base, &sched->entity, + args->credits, NULL); if (ret) goto err_free_chains; @@ -109,6 +107,27 @@ return ret; } void +nouveau_job_fini(struct nouveau_job *job) +{ + dma_fence_put(job->done_fence); + drm_sched_job_cleanup(&job->base); + + job->ops->free(job); +} + +void +nouveau_job_done(struct nouveau_job *job) +{ + struct nouveau_sched *sched = job->sched; + + spin_lock(&sched->job.list.lock); + list_del(&job->entry); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); +} + +void nouveau_job_free(struct nouveau_job *job) { kfree(job->in_sync.data); @@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job) kfree(job->out_sync.chains); } -void nouveau_job_fini(struct nouveau_job *job) -{ - dma_fence_put(job->done_fence); - drm_sched_job_cleanup(&job->base); - job->ops->free(job); -} - static int sync_find_fence(struct nouveau_job *job, struct drm_nouveau_sync *sync, @@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job) int nouveau_job_submit(struct nouveau_job *job) { - struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); + struct nouveau_sched *sched = job->sched; struct dma_fence *done_fence = NULL; + struct drm_gpuvm_exec vm_exec = { + .vm = &nouveau_cli_uvmm(job->cli)->base, + .flags = DRM_EXEC_IGNORE_DUPLICATES, + .num_fences = 1, + }; int ret; ret = nouveau_job_add_deps(job); @@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job) /* Make sure the job appears on the sched_entity's queue in the same * order as it was submitted. */ - mutex_lock(&entity->mutex); + mutex_lock(&sched->mutex); /* Guarantee we won't fail after the submit() callback returned * successfully. */ if (job->ops->submit) { - ret = job->ops->submit(job); + ret = job->ops->submit(job, &vm_exec); if (ret) goto err_cleanup; } + /* Submit was successful; add the job to the schedulers job list. */ + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); + drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); if (job->sync) done_fence = dma_fence_get(job->done_fence); - /* If a sched job depends on a dma-fence from a job from the same GPU - * scheduler instance, but a different scheduler entity, the GPU - * scheduler does only wait for the particular job to be scheduled, - * rather than for the job to fully complete. This is due to the GPU - * scheduler assuming that there is a scheduler instance per ring. - * However, the current implementation, in order to avoid arbitrary - * amounts of kthreads, has a single scheduler instance while scheduler - * entities represent rings. - * - * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all - * out-fences in order to force the scheduler to wait for full job - * completion for dependent jobs from different entities and same - * scheduler instance. - * - * There is some work in progress [1] to address the issues of firmware - * schedulers; once it is in-tree the scheduler topology in Nouveau - * should be re-worked accordingly. - * - * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/ - */ - set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); - if (job->ops->armed_submit) - job->ops->armed_submit(job); + job->ops->armed_submit(job, &vm_exec); nouveau_job_fence_attach(job); @@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job) drm_sched_entity_push_job(&job->base); - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); if (done_fence) { dma_fence_wait(done_fence, true); @@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job) return 0; err_cleanup: - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); nouveau_job_fence_attach_cleanup(job); err: job->state = NOUVEAU_JOB_SUBMIT_FAILED; return ret; } -bool -nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work) -{ - return queue_work(entity->sched_wq, work); -} - static struct dma_fence * nouveau_job_run(struct nouveau_job *job) { @@ -399,50 +392,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq) -{ - mutex_init(&entity->mutex); - spin_lock_init(&entity->job.list.lock); - INIT_LIST_HEAD(&entity->job.list.head); - init_waitqueue_head(&entity->job.wq); - - entity->sched_wq = sched_wq; - return drm_sched_entity_init(&entity->base, - DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); -} - -void -nouveau_sched_entity_fini(struct nouveau_sched_entity *entity) -{ - drm_sched_entity_destroy(&entity->base); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, }; -int nouveau_sched_init(struct nouveau_drm *drm) +int +nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) { - struct drm_gpu_scheduler *sched = &drm->sched; + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); + int ret; - drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); - if (!drm->sched_wq) - return -ENOMEM; + if (!wq) { + wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE, + current->pid); + if (!wq) + return -ENOMEM; + + sched->wq = wq; + } - return drm_sched_init(sched, &nouveau_sched_ops, NULL, - DRM_SCHED_PRIORITY_COUNT, - NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, - NULL, NULL, "nouveau_sched", drm->dev->dev); + ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq, + NOUVEAU_SCHED_PRIORITY_COUNT, + credit_limit, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); + if (ret) + goto fail_wq; + + /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use + * when we want to have a single run-queue only. + * + * It's not documented, but one will find out when trying to use any + * other priority running into faults, because the scheduler uses the + * priority as array index. + * + * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not + * matching the enum type used in drm_sched_entity_init(). + */ + ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL, + &drm_sched, 1, NULL); + if (ret) + goto fail_sched; + + mutex_init(&sched->mutex); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); + + return 0; + +fail_sched: + drm_sched_fini(drm_sched); +fail_wq: + if (sched->wq) + destroy_workqueue(sched->wq); + return ret; } -void nouveau_sched_fini(struct nouveau_drm *drm) +void +nouveau_sched_fini(struct nouveau_sched *sched) { - destroy_workqueue(drm->sched_wq); - drm_sched_fini(&drm->sched); + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; + + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + + drm_sched_entity_fini(entity); + drm_sched_fini(drm_sched); + + /* Destroy workqueue after scheduler tear down, otherwise it might still + * be in use. + */ + if (sched->wq) + destroy_workqueue(sched->wq); } |