From 5918045c4ed492fb5813f980dcf89a90fefd0a4e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 18 Apr 2019 11:00:21 -0400 Subject: drm/scheduler: rework job destruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now destroy finished jobs from the worker thread to make sure that we never destroy a job currently in timeout processing. By this we avoid holding lock around ring mirror list in drm_sched_stop which should solve a deadlock reported by a user. v2: Remove unused variable. v4: Move guilty job free into sched code. v5: Move sched->hw_rq_count to drm_sched_start to account for counter decrement in drm_sched_stop even when we don't call resubmit jobs if guily job did signal. v6: remove unused variable Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109692 Acked-by: Chunming Zhou Signed-off-by: Christian König Signed-off-by: Andrey Grodzovsky Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1555599624-12285-3-git-send-email-andrey.grodzovsky@amd.com --- drivers/gpu/drm/lima/lima_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/lima/lima_sched.c') diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index d53bd45f8d96..58a15479d175 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -258,7 +258,7 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job) static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe, struct lima_sched_task *task) { - drm_sched_stop(&pipe->base); + drm_sched_stop(&pipe->base, &task->base); if (task) drm_sched_increase_karma(&task->base); -- cgit v1.2.3 From 47ab14578263571228ce63e9417d96847693c7b4 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Tue, 21 May 2019 00:42:29 +0200 Subject: drm/lima: add timeout to drm scheduler init After "5918045c4ed4 drm/scheduler: rework job destruction", lima started to leak memory due to buffers not being destroyed after job execution in the drm scheduler. This started happening because the drm scheduler only destroyed buffers after cancelling the job timeout handler, and for lima this handler was never started as lima specified a MAX_SCHEDULE_TIMEOUT timeout. Lima seems to run well in its current state with a real timeout, so to make it more aligned with the other drivers from now on, let's use a real default timeout. This also fixes the observed memory leaks. The 500ms value was chosen as it is the current value for all other embedded gpu drivers using drm sched. Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20190520224229.21111-1-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_drv.c | 2 +- drivers/gpu/drm/lima/lima_sched.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/lima/lima_sched.c') diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index f9a281a62083..b29c26cd13b2 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -17,7 +17,7 @@ int lima_sched_timeout_ms; -MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))"); +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms"); module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444); static int lima_ioctl_get_param(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 58a15479d175..4127cacac454 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -329,19 +329,16 @@ static void lima_sched_error_work(struct work_struct *work) int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) { - long timeout; - - if (lima_sched_timeout_ms <= 0) - timeout = MAX_SCHEDULE_TIMEOUT; - else - timeout = msecs_to_jiffies(lima_sched_timeout_ms); + unsigned int timeout = lima_sched_timeout_ms > 0 ? + lima_sched_timeout_ms : 500; pipe->fence_context = dma_fence_context_alloc(1); spin_lock_init(&pipe->fence_lock); INIT_WORK(&pipe->error_work, lima_sched_error_work); - return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name); + return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, + msecs_to_jiffies(timeout), name); } void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) -- cgit v1.2.3