diff options
Diffstat (limited to 'include/drm/gpu_scheduler.h')
-rw-r--r-- | include/drm/gpu_scheduler.h | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index aa90ed1f1b2b..88ae7f331bb1 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -253,6 +253,24 @@ struct drm_sched_backend_ops { * 5. Restart the scheduler using drm_sched_start(). At that point, new * jobs can be queued, and the scheduler thread is unblocked * + * Note that some GPUs have distinct hardware queues but need to reset + * the GPU globally, which requires extra synchronization between the + * timeout handler of the different &drm_gpu_scheduler. One way to + * achieve this synchronization is to create an ordered workqueue + * (using alloc_ordered_workqueue()) at the driver level, and pass this + * queue to drm_sched_init(), to guarantee that timeout handlers are + * executed sequentially. The above workflow needs to be slightly + * adjusted in that case: + * + * 1. Stop all schedulers impacted by the reset using drm_sched_stop() + * 2. Try to gracefully stop non-faulty jobs on all queues impacted by + * the reset (optional) + * 3. Issue a GPU reset on all faulty queues (driver-specific) + * 4. Re-submit jobs on all schedulers impacted by the reset using + * drm_sched_resubmit_jobs() + * 5. Restart all schedulers that were stopped in step #1 using + * drm_sched_start() + * * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal, * and the underlying driver has started or completed recovery. * @@ -283,6 +301,7 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @timeout_wq: workqueue used to queue @work_tdr * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. * @thread: the kthread on which the scheduler which run. @@ -307,6 +326,7 @@ struct drm_gpu_scheduler { wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct workqueue_struct *timeout_wq; struct delayed_work work_tdr; struct task_struct *thread; struct list_head pending_list; @@ -320,7 +340,8 @@ struct drm_gpu_scheduler { int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, - uint32_t hw_submission, unsigned hang_limit, long timeout, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name); void drm_sched_fini(struct drm_gpu_scheduler *sched); |