From 48f05f2955e4a3183b219d6dfdb1c28e17d03da7 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 Oct 2017 16:21:08 +0800 Subject: amd/scheduler:imple job skip feature(v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jobs are skipped under two cases 1)when the entity behind this job marked guilty, the job poped from this entity's queue will be dropped in sched_main loop. 2)in job_recovery(), skip the scheduling job if its karma detected above limit, and also skipped as well for other jobs sharing the same fence context. this approach is becuase job_recovery() cannot access job->entity due to entity may already dead. v2: some logic fix v3: when entity detected guilty, don't drop the job in the poping stage, instead set its fence error as -ECANCELED in run_job(), skip the scheduling either:1) fence->error < 0 or 2) there was a VRAM LOST occurred on this job. this way we can unify the job skipping logic. with this feature we can introduce new gpu recover feature. Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index f60662e03761..0a90c768dbc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -180,7 +180,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { - struct dma_fence *fence = NULL; + struct dma_fence *fence = NULL, *finished; struct amdgpu_device *adev; struct amdgpu_job *job; int r; @@ -190,15 +190,18 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + finished = &job->base.s_fence->finished; adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - /* skip ib schedule when vram is lost */ - if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { - dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); - DRM_ERROR("Skip scheduling IBs!\n"); + + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ + + if (finished->error < 0) { + DRM_INFO("Skip scheduling IBs!\n"); } else { r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); -- cgit v1.2.3