diff options
author | Dave Airlie <airlied@redhat.com> | 2015-08-27 06:00:28 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-08-27 06:00:28 +0300 |
commit | 40b2dffbcc67e92d5df97785dffc68fe88605bfa (patch) | |
tree | 91276b6ae4210791ad4494adaf69a56b16c7b0ac /drivers | |
parent | db56176025cee5e242dfeed5f4e304d095d29fa3 (diff) | |
parent | c2b6bd7e91aad8440a2f55bdbde6f5a8ae19fac5 (diff) | |
download | linux-40b2dffbcc67e92d5df97785dffc68fe88605bfa.tar.xz |
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
- DP fixes for radeon and amdgpu
- IH ring fix for tonga and fiji
- Lots of GPU scheduler fixes
- Misc additional fixes
* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits)
drm/amdgpu: fix wait queue handling in the scheduler
drm/amdgpu: remove extra parameters from scheduler callbacks
drm/amdgpu: wake up scheduler only when neccessary
drm/amdgpu: remove entity idle timeout v2
drm/amdgpu: fix postclose order
drm/amdgpu: use IB for copy buffer of eviction
drm/amdgpu: adjust the judgement of removing fence callback
drm/amdgpu: fix no sync_wait in copy_buffer
drm/amdgpu: fix last_vm_update fence is not effetive for sched fence
drm/amdgpu: add priv data to sched
drm/amdgpu: add owner for sched fence
drm/amdgpu: remove entity reference from sched fence
drm/amdgpu: fix and cleanup amd_sched_entity_push_job
drm/amdgpu: remove amdgpu_bo_list_clone
drm/amdgpu: remove the context from amdgpu_job
drm/amdgpu: remove unused parameters to amd_sched_create
drm/amdgpu: remove sched_lock
drm/amdgpu: remove prepare_job callback
drm/amdgpu: cleanup a scheduler function name
drm/amdgpu: reorder scheduler functions
...
Diffstat (limited to 'drivers')
30 files changed, 671 insertions, 727 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2fc58e658986..aa2dcf578dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -183,6 +183,7 @@ struct amdgpu_vm; struct amdgpu_ring; struct amdgpu_semaphore; struct amdgpu_cs_parser; +struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; @@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs { unsigned copy_num_dw; /* used for buffer migration */ - void (*emit_copy_buffer)(struct amdgpu_ring *ring, + void (*emit_copy_buffer)(struct amdgpu_ib *ib, /* src addr in bytes */ uint64_t src_offset, /* dst addr in bytes */ @@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); -signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, - struct amdgpu_fence **fences, - bool intr, long t); +signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, + struct fence **array, + uint32_t count, + bool wait_all, + bool intr, + signed long t); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); void amdgpu_fence_unref(struct amdgpu_fence **fence); @@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct amdgpu_fence **fence); + struct fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); struct amdgpu_bo_list_entry { @@ -650,7 +654,7 @@ struct amdgpu_sa_bo { struct amdgpu_sa_manager *manager; unsigned soffset; unsigned eoffset; - struct amdgpu_fence *fence; + struct fence *fence; }; /* @@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore); void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_semaphore **semaphore, - struct amdgpu_fence *fence); + struct fence *fence); /* * Synchronization @@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_sync { struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; - struct amdgpu_fence *last_vm_update; + DECLARE_HASHTABLE(fences, 4); + struct fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); @@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, void *owner); int amdgpu_sync_rings(struct amdgpu_sync *sync, struct amdgpu_ring *ring); +int amdgpu_sync_wait(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct amdgpu_fence *fence); + struct fence *fence); /* * GART structures, functions & helpers @@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_ib *ibs, unsigned num_ibs, - int (*free_job)(struct amdgpu_cs_parser *), + int (*free_job)(struct amdgpu_job *), void *owner, struct fence **fence); @@ -957,7 +963,7 @@ struct amdgpu_vm_id { unsigned id; uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct amdgpu_fence *flushed_updates; + struct fence *flushed_updates; /* last use of vmid */ struct amdgpu_fence *last_id_use; }; @@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence, uint64_t queued_seq); + struct fence *fence); struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); @@ -1078,8 +1084,6 @@ struct amdgpu_bo_list { }; struct amdgpu_bo_list * -amdgpu_bo_list_clone(struct amdgpu_bo_list *list); -struct amdgpu_bo_list * amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); void amdgpu_bo_list_free(struct amdgpu_bo_list *list); @@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_user_fence uf; +}; - struct amdgpu_ring *ring; - struct mutex job_lock; - struct work_struct job_work; - int (*prepare_job)(struct amdgpu_cs_parser *sched_job); - int (*run_job)(struct amdgpu_cs_parser *sched_job); - int (*free_job)(struct amdgpu_cs_parser *sched_job); - struct amd_sched_fence *s_fence; +struct amdgpu_job { + struct amd_sched_job base; + struct amdgpu_device *adev; + struct amdgpu_ib *ibs; + uint32_t num_ibs; + struct mutex job_lock; + struct amdgpu_user_fence uf; + int (*free_job)(struct amdgpu_job *sched_job); }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) @@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) -#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) @@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync); void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, - struct amdgpu_fence *updates); + struct fence *updates); void amdgpu_vm_fence(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_fence *fence); @@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t addr); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); - +int amdgpu_vm_free_job(struct amdgpu_job *job); /* * functions used by amdgpu_encoder.c */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 759482e4300d..98d59ee640ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct amdgpu_fence *fence = NULL; + struct fence *fence = NULL; int i, r; start_jiffies = jiffies; @@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); if (r) goto exit_do_move; - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) goto exit_do_move; - amdgpu_fence_unref(&fence); + fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: if (fence) - amdgpu_fence_unref(&fence); + fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7eed523bf28f..f82a2dd83874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, return 0; } -struct amdgpu_bo_list * -amdgpu_bo_list_clone(struct amdgpu_bo_list *list) -{ - struct amdgpu_bo_list *result; - unsigned i; - - result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); - if (!result) - return NULL; - - result->array = drm_calloc_large(list->num_entries, - sizeof(struct amdgpu_bo_list_entry)); - if (!result->array) { - kfree(result); - return NULL; - } - - mutex_init(&result->lock); - result->gds_obj = list->gds_obj; - result->gws_obj = list->gws_obj; - result->oa_obj = list->oa_obj; - result->has_userptr = list->has_userptr; - result->num_entries = list->num_entries; - - memcpy(result->array, list->array, list->num_entries * - sizeof(struct amdgpu_bo_list_entry)); - - for (i = 0; i < result->num_entries; ++i) - amdgpu_bo_ref(result->array[i].robj); - - return result; -} - static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 27df17a0e620..89c3dd62ba21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { + /* Don't try to start link training before we + * have the dpcd */ + if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + return; + /* set it to OFF so that drm_helper_connector_dpms() * won't return immediately since the current state * is ON at this point. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e4424b4db5d3..6a206f15635f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, return 0; } -static void amdgpu_job_work_func(struct work_struct *work) -{ - struct amdgpu_cs_parser *sched_job = - container_of(work, struct amdgpu_cs_parser, - job_work); - mutex_lock(&sched_job->job_lock); - if (sched_job->free_job) - sched_job->free_job(sched_job); - mutex_unlock(&sched_job->job_lock); - /* after processing job, free memory */ - fence_put(&sched_job->s_fence->base); - kfree(sched_job); -} struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_ctx *ctx, @@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, parser->ctx = ctx; parser->ibs = ibs; parser->num_ibs = num_ibs; - if (amdgpu_enable_scheduler) { - mutex_init(&parser->job_lock); - INIT_WORK(&parser->job_work, amdgpu_job_work_func); - } for (i = 0; i < num_ibs; i++) ibs[i].ctx = ctx; @@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) uint64_t *chunk_array_user; uint64_t *chunk_array = NULL; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_bo_list *bo_list = NULL; unsigned size, i; int r = 0; @@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) r = -EINVAL; goto out; } - bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (!amdgpu_enable_scheduler) - p->bo_list = bo_list; - else { - if (bo_list && !bo_list->has_userptr) { - p->bo_list = amdgpu_bo_list_clone(bo_list); - amdgpu_bo_list_put(bo_list); - if (!p->bo_list) - return -ENOMEM; - } else if (bo_list && bo_list->has_userptr) - p->bo_list = bo_list; - else - p->bo_list = NULL; - } + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); /* get chunks */ INIT_LIST_HEAD(&p->validated); @@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } - p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); + p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); if (!p->ibs) r = -ENOMEM; @@ -498,25 +467,24 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) unsigned i; if (parser->ctx) amdgpu_ctx_put(parser->ctx); - if (parser->bo_list) { - if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr) - amdgpu_bo_list_free(parser->bo_list); - else - amdgpu_bo_list_put(parser->bo_list); - } + if (parser->bo_list) + amdgpu_bo_list_put(parser->bo_list); + drm_free_large(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - if (parser->ibs) - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); - kfree(parser->ibs); - if (parser->uf.bo) - drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); - if (!amdgpu_enable_scheduler) - kfree(parser); + { + if (parser->ibs) + for (i = 0; i < parser->num_ibs; i++) + amdgpu_ib_free(parser->adev, &parser->ibs[i]); + kfree(parser->ibs); + if (parser->uf.bo) + drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); + } + + kfree(parser); } /** @@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_cs_parser_fini_late(parser); } -static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job) -{ - amdgpu_cs_parser_fini_late(sched_job); - return 0; -} - static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm) { @@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) +static int amdgpu_cs_free_job(struct amdgpu_job *sched_job) { - int r, i; - struct amdgpu_cs_parser *parser = sched_job; - struct amdgpu_device *adev = sched_job->adev; - bool reserved_buffers = false; - - r = amdgpu_cs_parser_relocs(parser); - if (r) { - if (r != -ERESTARTSYS) { - if (r == -ENOMEM) - DRM_ERROR("Not enough memory for command submission!\n"); - else - DRM_ERROR("Failed to process the buffer list %d!\n", r); - } - } - - if (!r) { - reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, parser); - } - if (!r) { - r = amdgpu_cs_dependencies(adev, parser); - if (r) - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - } - if (r) { - amdgpu_cs_parser_fini(parser, r, reserved_buffers); - return r; - } - - for (i = 0; i < parser->num_ibs; i++) - trace_amdgpu_cs(parser, i); - - r = amdgpu_cs_ib_vm_chunk(adev, parser); - return r; -} - -static struct amdgpu_ring *amdgpu_cs_parser_get_ring( - struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) -{ - int i, r; - - struct amdgpu_cs_chunk *chunk; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_ring *ring; - for (i = 0; i < parser->nchunks; i++) { - chunk = &parser->chunks[i]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &ring); - if (r) - return NULL; - break; - } - return ring; + int i; + if (sched_job->ibs) + for (i = 0; i < sched_job->num_ibs; i++) + amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); + kfree(sched_job->ibs); + if (sched_job->uf.bo) + drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base); + return 0; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct amdgpu_device *adev = dev->dev_private; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser *parser; - int r; + bool reserved_buffers = false; + int i, r; down_read(&adev->exclusive_lock); if (!adev->accel_working) { @@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return r; } - if (amdgpu_enable_scheduler && parser->num_ibs) { - struct amdgpu_ring * ring = - amdgpu_cs_parser_get_ring(adev, parser); - r = amdgpu_cs_parser_prepare_job(parser); + r = amdgpu_cs_parser_relocs(parser); + if (r == -ENOMEM) + DRM_ERROR("Not enough memory for command submission!\n"); + else if (r && r != -ERESTARTSYS) + DRM_ERROR("Failed to process the buffer list %d!\n", r); + else if (!r) { + reserved_buffers = true; + r = amdgpu_cs_ib_fill(adev, parser); + } + + if (!r) { + r = amdgpu_cs_dependencies(adev, parser); if (r) - goto out; - parser->ring = ring; - parser->free_job = amdgpu_cs_parser_free_job; - mutex_lock(&parser->job_lock); - r = amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].entity, - parser, - &parser->s_fence); + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + } + + if (r) + goto out; + + for (i = 0; i < parser->num_ibs; i++) + trace_amdgpu_cs(parser, i); + + r = amdgpu_cs_ib_vm_chunk(adev, parser); + if (r) + goto out; + + if (amdgpu_enable_scheduler && parser->num_ibs) { + struct amdgpu_job *job; + struct amdgpu_ring * ring = parser->ibs->ring; + job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); + if (!job) + return -ENOMEM; + job->base.sched = ring->scheduler; + job->base.s_entity = &parser->ctx->rings[ring->idx].entity; + job->adev = parser->adev; + job->ibs = parser->ibs; + job->num_ibs = parser->num_ibs; + job->base.owner = parser->filp; + mutex_init(&job->job_lock); + if (job->ibs[job->num_ibs - 1].user) { + memcpy(&job->uf, &parser->uf, + sizeof(struct amdgpu_user_fence)); + job->ibs[job->num_ibs - 1].user = &job->uf; + } + + job->free_job = amdgpu_cs_free_job; + mutex_lock(&job->job_lock); + r = amd_sched_entity_push_job((struct amd_sched_job *)job); if (r) { - mutex_unlock(&parser->job_lock); + mutex_unlock(&job->job_lock); + amdgpu_cs_free_job(job); + kfree(job); goto out; } - parser->ibs[parser->num_ibs - 1].sequence = + cs->out.handle = amdgpu_ctx_add_fence(parser->ctx, ring, - &parser->s_fence->base, - parser->s_fence->v_seq); - cs->out.handle = parser->s_fence->v_seq; + &job->base.s_fence->base); + parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; + list_sort(NULL, &parser->validated, cmp_size_smaller_first); ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, - &parser->s_fence->base); + &job->base.s_fence->base); - mutex_unlock(&parser->job_lock); + mutex_unlock(&job->job_lock); + amdgpu_cs_parser_fini_late(parser); up_read(&adev->exclusive_lock); return 0; } - r = amdgpu_cs_parser_prepare_job(parser); - if (r) - goto out; cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; out: - amdgpu_cs_parser_fini(parser, r, true); + amdgpu_cs_parser_fini(parser, r, reserved_buffers); up_read(&adev->exclusive_lock); r = amdgpu_cs_handle_lockup(adev, r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 08bc7722ddb8..20cbc4eb5a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence, uint64_t queued_seq) + struct fence *fence) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; - uint64_t seq = 0; + uint64_t seq = cring->sequence; unsigned idx = 0; struct fence *other = NULL; - if (amdgpu_enable_scheduler) - seq = queued_seq; - else - seq = cring->sequence; idx = seq % AMDGPU_CTX_MAX_CS_PENDING; other = cring->fences[idx]; if (other) { @@ -253,8 +249,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; - if (!amdgpu_enable_scheduler) - cring->sequence++; + cring->sequence++; spin_unlock(&ctx->ring_lock); fence_put(other); @@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; - uint64_t queued_seq; spin_lock(&ctx->ring_lock); - if (amdgpu_enable_scheduler) - queued_seq = amd_sched_next_queued_seq(&cring->entity); - else - queued_seq = cring->sequence; - if (seq >= queued_seq) { + if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } - if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { + if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e6fa27805207..0fcc0bd1622c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -49,9 +49,10 @@ /* * KMS wrapper. * - 3.0.0 - initial driver + * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP) */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 0 +#define KMS_DRIVER_MINOR 1 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 98500f1756f7..f446bf2fedc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ring->fence_drv.ring = ring; if (amdgpu_enable_scheduler) { - ring->scheduler = amd_sched_create((void *)ring->adev, - &amdgpu_sched_ops, - ring->idx, 5, 0, - amdgpu_sched_hw_submission); + ring->scheduler = amd_sched_create(&amdgpu_sched_ops, + ring->idx, + amdgpu_sched_hw_submission, + (void *)ring->adev); if (!ring->scheduler) DRM_ERROR("Failed to create scheduler on ring %d.\n", ring->idx); @@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); } -static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) +static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count) { int idx; - struct amdgpu_fence *fence; + struct fence *fence; - idx = 0; - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { + for (idx = 0; idx < count; ++idx) { fence = fences[idx]; if (fence) { - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return true; } } return false; } +static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count) +{ + int idx; + struct fence *fence; + + for (idx = 0; idx < count; ++idx) { + fence = fences[idx]; + if (fence) { + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return false; + } + } + + return true; +} + struct amdgpu_wait_cb { struct fence_cb base; struct task_struct *task; @@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, signed long t) { - struct amdgpu_fence *array[AMDGPU_MAX_RINGS]; struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_device *adev = fence->ring->adev; - memset(&array[0], 0, sizeof(array)); - array[0] = fence; - - return amdgpu_fence_wait_any(adev, array, intr, t); + return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); } -/* wait until any fence in array signaled */ -signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, - struct amdgpu_fence **array, bool intr, signed long t) +/** + * Wait the fence array with timeout + * + * @adev: amdgpu device + * @array: the fence array with amdgpu fence pointer + * @count: the number of the fence array + * @wait_all: the flag of wait all(true) or wait any(false) + * @intr: when sleep, set the current task interruptable or not + * @t: timeout to wait + * + * If wait_all is true, it will return when all fences are signaled or timeout. + * If wait_all is false, it will return when any fence is signaled or timeout. + */ +signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, + struct fence **array, + uint32_t count, + bool wait_all, + bool intr, + signed long t) { long idx = 0; - struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; - struct amdgpu_fence *fence; + struct amdgpu_wait_cb *cb; + struct fence *fence; BUG_ON(!array); - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { + cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL); + if (cb == NULL) { + t = -ENOMEM; + goto err_free_cb; + } + + for (idx = 0; idx < count; ++idx) { fence = array[idx]; if (fence) { cb[idx].task = current; - if (fence_add_callback(&fence->base, - &cb[idx].base, amdgpu_fence_wait_cb)) - return t; /* return if fence is already signaled */ + if (fence_add_callback(fence, + &cb[idx].base, amdgpu_fence_wait_cb)) { + /* The fence is already signaled */ + if (wait_all) + continue; + else + goto fence_rm_cb; + } } } @@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, * amdgpu_test_signaled_any must be called after * set_current_state to prevent a race with wake_up_process */ - if (amdgpu_test_signaled_any(array)) + if (!wait_all && amdgpu_test_signaled_any(array, count)) + break; + if (wait_all && amdgpu_test_signaled_all(array, count)) break; if (adev->needs_reset) { @@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, __set_current_state(TASK_RUNNING); - idx = 0; - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { +fence_rm_cb: + for (idx = 0; idx < count; ++idx) { fence = array[idx]; - if (fence) - fence_remove_callback(&fence->base, &cb[idx].base); + if (fence && cb[idx].base.func) + fence_remove_callback(fence, &cb[idx].base); } +err_free_cb: + kfree(cb); + return t; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5104e64e9ad8..c439735ee670 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, if (!vm) ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); - else - ib->gpu_addr = 0; - - } else { - ib->sa_bo = NULL; - ib->ptr = NULL; - ib->gpu_addr = 0; } amdgpu_sync_create(&ib->sync); ib->ring = ring; - ib->fence = NULL; - ib->user = NULL; ib->vm = vm; - ib->ctx = NULL; - ib->gds_base = 0; - ib->gds_size = 0; - ib->gws_base = 0; - ib->gws_size = 0; - ib->oa_base = 0; - ib->oa_size = 0; - ib->flags = 0; return 0; } @@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, */ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) { - amdgpu_sync_free(adev, &ib->sync, ib->fence); - amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); + amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); + amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); amdgpu_fence_unref(&ib->fence); } @@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; - uint64_t sequence; unsigned i; int r = 0; @@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } - + r = amdgpu_sync_wait(&ibs->sync); + if (r) { + dev_err(adev->dev, "IB sync failed (%d).\n", r); + return r; + } r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); @@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } - sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base, - sequence); + &ib->fence->base); /* wrap the last IB with fence */ if (ib->user) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 90044b254404..5c8a803acedc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, /* add 8 bytes for the rptr/wptr shadows and * add them to the end of the ring allocation. */ - adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL); + adev->irq.ih.ring = pci_alloc_consistent(adev->pdev, + adev->irq.ih.ring_size + 8, + &adev->irq.ih.rb_dma_addr); if (adev->irq.ih.ring == NULL) return -ENOMEM; - adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev, - (void *)adev->irq.ih.ring, - adev->irq.ih.ring_size, - PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) { - dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n"); - kfree((void *)adev->irq.ih.ring); - return -ENOMEM; - } + memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8); adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; } @@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) /* add 8 bytes for the rptr/wptr shadows and * add them to the end of the ring allocation. */ - pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr, - adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL); - kfree((void *)adev->irq.ih.ring); + pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8, + (void *)adev->irq.ih.ring, + adev->irq.ih.rb_dma_addr); adev->irq.ih.ring = NULL; } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 87da6b1848fd..22367939ebf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (!fpriv) return; + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_vm_fini(adev, &fpriv->vm); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) @@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, idr_destroy(&fpriv->bo_list_handles); mutex_destroy(&fpriv->bo_list_lock); - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); - kfree(fpriv); file_priv->driver_priv = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 238465a9ac55..6ea18dcec561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, unsigned size, unsigned align); void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct amdgpu_fence *fence); + struct fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index d6398cf45f24..b92525329d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, return r; } +static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) +{ + struct amdgpu_fence *a_fence; + struct amd_sched_fence *s_fence; + + s_fence = to_amd_sched_fence(f); + if (s_fence) + return s_fence->scheduler->ring_id; + a_fence = to_amdgpu_fence(f); + if (a_fence) + return a_fence->ring->idx; + return 0; +} + static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) { struct amdgpu_sa_manager *sa_manager = sa_bo->manager; @@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) } list_del_init(&sa_bo->olist); list_del_init(&sa_bo->flist); - amdgpu_fence_unref(&sa_bo->fence); + fence_put(sa_bo->fence); kfree(sa_bo); } @@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { if (sa_bo->fence == NULL || - !fence_is_signaled(&sa_bo->fence->base)) { + !fence_is_signaled(sa_bo->fence)) { return; } amdgpu_sa_bo_remove_locked(sa_bo); @@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, } static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, - struct amdgpu_fence **fences, + struct fence **fences, unsigned *tries) { struct amdgpu_sa_bo *best_bo = NULL; @@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, sa_bo = list_first_entry(&sa_manager->flist[i], struct amdgpu_sa_bo, flist); - if (!fence_is_signaled(&sa_bo->fence->base)) { + if (!fence_is_signaled(sa_bo->fence)) { fences[i] = sa_bo->fence; continue; } @@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, } if (best_bo) { - ++tries[best_bo->fence->ring->idx]; + uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence); + ++tries[idx]; sa_manager->hole = best_bo->olist.prev; /* we knew that this one is signaled, @@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align) { - struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; + struct fence *fences[AMDGPU_MAX_RINGS]; unsigned tries[AMDGPU_MAX_RINGS]; int i, r; signed long t; @@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); spin_unlock(&sa_manager->wq.lock); - t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT); + t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false, + MAX_SCHEDULE_TIMEOUT); r = (t > 0) ? 0 : t; spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ @@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, } void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct amdgpu_fence *fence) + struct fence *fence) { struct amdgpu_sa_manager *sa_manager; @@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->wq.lock); - if (fence && !fence_is_signaled(&fence->base)) { - (*sa_bo)->fence = amdgpu_fence_ref(fence); - list_add_tail(&(*sa_bo)->flist, - &sa_manager->flist[fence->ring->idx]); + if (fence && !fence_is_signaled(fence)) { + uint32_t idx; + (*sa_bo)->fence = fence_get(fence); + idx = amdgpu_sa_get_ring_from_fence(fence); + list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); } else { amdgpu_sa_bo_remove_locked(*sa_bo); } @@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", soffset, eoffset, eoffset - soffset); if (i->fence) { - seq_printf(m, " protected by 0x%016llx on ring %d", - i->fence->seq, i->fence->ring->idx); + struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); + struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence); + if (a_fence) + seq_printf(m, " protected by 0x%016llx on ring %d", + a_fence->seq, a_fence->ring->idx); + if (s_fence) + seq_printf(m, " protected by 0x%016x on ring %d", + s_fence->base.seqno, + s_fence->scheduler->ring_id); + } seq_printf(m, "\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index a86e38158afa..f93fb3541488 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -27,55 +27,28 @@ #include <drm/drmP.h> #include "amdgpu.h" -static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity, - struct amd_sched_job *job) +static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) { - int r = 0; - struct amdgpu_cs_parser *sched_job; - if (!job || !job->data) { - DRM_ERROR("job is null\n"); - return -EINVAL; - } - - sched_job = (struct amdgpu_cs_parser *)job->data; - if (sched_job->prepare_job) { - r = sched_job->prepare_job(sched_job); - if (r) { - DRM_ERROR("Prepare job error\n"); - schedule_work(&sched_job->job_work); - } - } - return r; -} - -static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity, - struct amd_sched_job *job) -{ - int r = 0; - struct amdgpu_cs_parser *sched_job; + struct amdgpu_job *sched_job; struct amdgpu_fence *fence; + int r; - if (!job || !job->data) { + if (!job) { DRM_ERROR("job is null\n"); return NULL; } - sched_job = (struct amdgpu_cs_parser *)job->data; + sched_job = (struct amdgpu_job *)job; mutex_lock(&sched_job->job_lock); r = amdgpu_ib_schedule(sched_job->adev, sched_job->num_ibs, sched_job->ibs, - sched_job->filp); + sched_job->base.owner); if (r) goto err; fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); - if (sched_job->run_job) { - r = sched_job->run_job(sched_job); - if (r) - goto err; - } + if (sched_job->free_job) + sched_job->free_job(sched_job); mutex_unlock(&sched_job->job_lock); return &fence->base; @@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, err: DRM_ERROR("Run job error\n"); mutex_unlock(&sched_job->job_lock); - schedule_work(&sched_job->job_work); + job->sched->ops->process_job(job); return NULL; } -static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, - struct amd_sched_job *job) +static void amdgpu_sched_process_job(struct amd_sched_job *job) { - struct amdgpu_cs_parser *sched_job; + struct amdgpu_job *sched_job; - if (!job || !job->data) { + if (!job) { DRM_ERROR("job is null\n"); return; } - sched_job = (struct amdgpu_cs_parser *)job->data; - schedule_work(&sched_job->job_work); + sched_job = (struct amdgpu_job *)job; + /* after processing job, free memory */ + fence_put(&sched_job->base.s_fence->base); + kfree(sched_job); } struct amd_sched_backend_ops amdgpu_sched_ops = { - .prepare_job = amdgpu_sched_prepare_job, .run_job = amdgpu_sched_run_job, .process_job = amdgpu_sched_process_job }; @@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_ib *ibs, unsigned num_ibs, - int (*free_job)(struct amdgpu_cs_parser *), + int (*free_job)(struct amdgpu_job *), void *owner, struct fence **f) { int r = 0; if (amdgpu_enable_scheduler) { - struct amdgpu_cs_parser *sched_job = - amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, - ibs, num_ibs); - if(!sched_job) { + struct amdgpu_job *job = + kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); + if (!job) return -ENOMEM; - } - sched_job->free_job = free_job; - mutex_lock(&sched_job->job_lock); - r = amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].entity, - sched_job, &sched_job->s_fence); + job->base.sched = ring->scheduler; + job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; + job->adev = adev; + job->ibs = ibs; + job->num_ibs = num_ibs; + job->base.owner = owner; + mutex_init(&job->job_lock); + job->free_job = free_job; + mutex_lock(&job->job_lock); + r = amd_sched_entity_push_job((struct amd_sched_job *)job); if (r) { - mutex_unlock(&sched_job->job_lock); - kfree(sched_job); + mutex_unlock(&job->job_lock); + kfree(job); return r; } - ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; - *f = fence_get(&sched_job->s_fence->base); - mutex_unlock(&sched_job->job_lock); + *f = fence_get(&job->base.s_fence->base); + mutex_unlock(&job->job_lock); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) return r; *f = fence_get(&ibs[num_ibs - 1].fence->base); } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c index d6d41a42ab65..ff3ca52ec6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c @@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_semaphore **semaphore, - struct amdgpu_fence *fence) + struct fence *fence) { if (semaphore == NULL || *semaphore == NULL) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 7cb711fc1ee2..4fffb2539331 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -32,6 +32,11 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +struct amdgpu_sync_entry { + struct hlist_node node; + struct fence *fence; +}; + /** * amdgpu_sync_create - zero init sync object * @@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) sync->sync_to[i] = NULL; + hash_init(sync->fences); sync->last_vm_update = NULL; } +static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) +{ + struct amdgpu_fence *a_fence = to_amdgpu_fence(f); + struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + + if (a_fence) + return a_fence->ring->adev == adev; + if (s_fence) + return (struct amdgpu_device *)s_fence->scheduler->priv == adev; + return false; +} + +static bool amdgpu_sync_test_owner(struct fence *f, void *owner) +{ + struct amdgpu_fence *a_fence = to_amdgpu_fence(f); + struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + if (s_fence) + return s_fence->owner == owner; + if (a_fence) + return a_fence->owner == owner; + return false; +} + /** * amdgpu_sync_fence - remember to sync to this fence * @@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct fence *f) { + struct amdgpu_sync_entry *e; struct amdgpu_fence *fence; struct amdgpu_fence *other; + struct fence *tmp, *later; if (!f) return 0; + if (amdgpu_sync_same_dev(adev, f) && + amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { + if (sync->last_vm_update) { + tmp = sync->last_vm_update; + BUG_ON(f->context != tmp->context); + later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; + sync->last_vm_update = fence_get(later); + fence_put(tmp); + } else + sync->last_vm_update = fence_get(f); + } + fence = to_amdgpu_fence(f); - if (!fence || fence->ring->adev != adev) - return fence_wait(f, true); + if (!fence || fence->ring->adev != adev) { + hash_for_each_possible(sync->fences, e, node, f->context) { + struct fence *new; + if (unlikely(e->fence->context != f->context)) + continue; + new = fence_get(fence_later(e->fence, f)); + if (new) { + fence_put(e->fence); + e->fence = new; + } + return 0; + } + + e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + + hash_add(sync->fences, &e->node, f->context); + e->fence = fence_get(f); + return 0; + } other = sync->sync_to[fence->ring->idx]; sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( amdgpu_fence_later(fence, other)); amdgpu_fence_unref(&other); - if (fence->owner == AMDGPU_FENCE_OWNER_VM) { - other = sync->last_vm_update; - sync->last_vm_update = amdgpu_fence_ref( - amdgpu_fence_later(fence, other)); - amdgpu_fence_unref(&other); - } - return 0; } @@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, return r; } +int amdgpu_sync_wait(struct amdgpu_sync *sync) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + r = fence_wait(e->fence, false); + if (r) + return r; + + hash_del(&e->node); + fence_put(e->fence); + kfree(e); + } + return 0; +} + /** * amdgpu_sync_rings - sync ring to all registered fences * @@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, */ void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct amdgpu_fence *fence) + struct fence *fence) { + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; unsigned i; + hash_for_each_safe(sync->fences, i, tmp, e, node) { + hash_del(&e->node); + fence_put(e->fence); + kfree(e); + } + for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) amdgpu_fence_unref(&sync->sync_to[i]); - amdgpu_fence_unref(&sync->last_vm_update); + fence_put(sync->last_vm_update); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 962dd5552137..f80b1a43be8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; - struct amdgpu_fence *fence = NULL; + struct fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); @@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } - amdgpu_fence_unref(&fence); + fence_put(fence); r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } - amdgpu_fence_unref(&fence); + fence_put(fence); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { @@ -214,7 +214,7 @@ out_lclean: amdgpu_bo_unref(>t_obj[i]); } if (fence) - amdgpu_fence_unref(&fence); + fence_put(fence); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dd3415d2e45d..399143541d8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct amdgpu_device *adev; struct amdgpu_ring *ring; uint64_t old_start, new_start; - struct amdgpu_fence *fence; + struct fence *fence; int r; adev = amdgpu_get_adev(bo->bdev); @@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, new_mem->num_pages * PAGE_SIZE, /* bytes */ bo->resv, &fence); /* FIXME: handle copy error */ - r = ttm_bo_move_accel_cleanup(bo, &fence->base, + r = ttm_bo_move_accel_cleanup(bo, fence, evict, no_wait_gpu, new_mem); - amdgpu_fence_unref(&fence); + fence_put(fence); return r; } @@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct amdgpu_fence **fence) + struct fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_sync sync; uint32_t max_bytes; unsigned num_loops, num_dw; + struct amdgpu_ib *ib; unsigned i; int r; - /* sync other rings */ - amdgpu_sync_create(&sync); - if (resv) { - r = amdgpu_sync_resv(adev, &sync, resv, false); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - amdgpu_sync_free(adev, &sync, NULL); - return r; - } - } - max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; - /* for fence and sync */ - num_dw += 64 + AMDGPU_NUM_SYNCS * 8; + /* for IB padding */ + while (num_dw & 0x7) + num_dw++; + + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!ib) + return -ENOMEM; - r = amdgpu_ring_lock(ring, num_dw); + r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); if (r) { - DRM_ERROR("ring lock failed (%d).\n", r); - amdgpu_sync_free(adev, &sync, NULL); + kfree(ib); return r; } - amdgpu_sync_rings(&sync, ring); + ib->length_dw = 0; + + if (resv) { + r = amdgpu_sync_resv(adev, &ib->sync, resv, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) { + DRM_ERROR("sync failed (%d).\n", r); + goto error_free; + } + } for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, + amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, cur_size_in_bytes); src_offset += cur_size_in_bytes; @@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, byte_count -= cur_size_in_bytes; } - r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); - if (r) { - amdgpu_ring_unlock_undo(ring); - amdgpu_sync_free(adev, &sync, NULL); - return r; - } - - amdgpu_ring_unlock_commit(ring); - amdgpu_sync_free(adev, &sync, *fence); + amdgpu_vm_pad_ib(adev, ib); + WARN_ON(ib->length_dw > num_dw); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, + &amdgpu_vm_free_job, + AMDGPU_FENCE_OWNER_MOVE, + fence); + if (r) + goto error_free; + if (!amdgpu_enable_scheduler) { + amdgpu_ib_free(adev, ib); + kfree(ib); + } return 0; +error_free: + amdgpu_ib_free(adev, ib); + kfree(ib); + return r; } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 68369cf1e318..b87355ccfb1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) } static int amdgpu_uvd_free_job( - struct amdgpu_cs_parser *sched_job) + struct amdgpu_job *sched_job) { amdgpu_ib_free(sched_job->adev, sched_job->ibs); kfree(sched_job->ibs); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 33ee6ae28f37..1a984c934b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } static int amdgpu_vce_free_job( - struct amdgpu_cs_parser *sched_job) + struct amdgpu_job *sched_job) { amdgpu_ib_free(sched_job->adev, sched_job->ibs); kfree(sched_job->ibs); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a78a206e176e..83b7ce6f5f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, */ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, - struct amdgpu_fence *updates) + struct fence *updates) { uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; - struct amdgpu_fence *flushed_updates = vm_id->flushed_updates; + struct fence *flushed_updates = vm_id->flushed_updates; + bool is_earlier = false; + + if (flushed_updates && updates) { + BUG_ON(flushed_updates->context != updates->context); + is_earlier = (updates->seqno - flushed_updates->seqno <= + INT_MAX) ? true : false; + } if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || - (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) { + is_earlier) { trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); - vm_id->flushed_updates = amdgpu_fence_ref( - amdgpu_fence_later(flushed_updates, updates)); - amdgpu_fence_unref(&flushed_updates); + if (is_earlier) { + vm_id->flushed_updates = fence_get(updates); + fence_put(flushed_updates); + } + if (!flushed_updates) + vm_id->flushed_updates = fence_get(updates); vm_id->pd_gpu_addr = pd_addr; amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); } @@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, } } -static int amdgpu_vm_free_job( - struct amdgpu_cs_parser *sched_job) +int amdgpu_vm_free_job(struct amdgpu_job *sched_job) { int i; for (i = 0; i < sched_job->num_ibs; i++) @@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) fence_put(vm->page_directory_fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - amdgpu_fence_unref(&vm->ids[i].flushed_updates); + fence_put(vm->ids[i].flushed_updates); amdgpu_fence_unref(&vm->ids[i].last_id_use); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 2b4242b39b0a..3920c1e346f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); @@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, +static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 9b0cab413677..fab7b236f37f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4b68e6306f40..818edb37fa9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9de8104eddeb..715e02d3bfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); @@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, +static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 029f3455f9f9..67128c8e78b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); @@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, +static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 265d3e2f63cc..d99fe90991dc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -27,30 +27,32 @@ #include <drm/drmP.h> #include "gpu_scheduler.h" +static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); + /* Initialize a given run queue struct */ static void amd_sched_rq_init(struct amd_sched_rq *rq) { + spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->entities); - mutex_init(&rq->lock); rq->current_entity = NULL; } static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, struct amd_sched_entity *entity) { - mutex_lock(&rq->lock); + spin_lock(&rq->lock); list_add_tail(&entity->list, &rq->entities); - mutex_unlock(&rq->lock); + spin_unlock(&rq->lock); } static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, struct amd_sched_entity *entity) { - mutex_lock(&rq->lock); + spin_lock(&rq->lock); list_del_init(&entity->list); if (rq->current_entity == entity) rq->current_entity = NULL; - mutex_unlock(&rq->lock); + spin_unlock(&rq->lock); } /** @@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, static struct amd_sched_entity * amd_sched_rq_select_entity(struct amd_sched_rq *rq) { - struct amd_sched_entity *entity = rq->current_entity; + struct amd_sched_entity *entity; + + spin_lock(&rq->lock); + entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { if (!kfifo_is_empty(&entity->job_queue)) { rq->current_entity = entity; + spin_unlock(&rq->lock); return rq->current_entity; } } @@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) if (!kfifo_is_empty(&entity->job_queue)) { rq->current_entity = entity; + spin_unlock(&rq->lock); return rq->current_entity; } @@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) break; } - return NULL; -} + spin_unlock(&rq->lock); -/** - * Note: This function should only been called inside scheduler main - * function for thread safety, there is no other protection here. - * return ture if scheduler has something ready to run. - * - * For active_hw_rq, there is only one producer(scheduler thread) and - * one consumer(ISR). It should be safe to use this function in scheduler - * main thread to decide whether to continue emit more IBs. -*/ -static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) -{ - unsigned long flags; - bool full; - - spin_lock_irqsave(&sched->queue_lock, flags); - full = atomic64_read(&sched->hw_rq_count) < - sched->hw_submission_limit ? true : false; - spin_unlock_irqrestore(&sched->queue_lock, flags); - - return full; -} - -/** - * Select next entity from the kernel run queue, if not available, - * return null. -*/ -static struct amd_sched_entity * -kernel_rq_select_context(struct amd_gpu_scheduler *sched) -{ - struct amd_sched_entity *sched_entity; - struct amd_sched_rq *rq = &sched->kernel_rq; - - mutex_lock(&rq->lock); - sched_entity = amd_sched_rq_select_entity(rq); - mutex_unlock(&rq->lock); - return sched_entity; -} - -/** - * Select next entity containing real IB submissions -*/ -static struct amd_sched_entity * -select_context(struct amd_gpu_scheduler *sched) -{ - struct amd_sched_entity *wake_entity = NULL; - struct amd_sched_entity *tmp; - struct amd_sched_rq *rq; - - if (!is_scheduler_ready(sched)) - return NULL; - - /* Kernel run queue has higher priority than normal run queue*/ - tmp = kernel_rq_select_context(sched); - if (tmp != NULL) - goto exit; - - rq = &sched->sched_rq; - mutex_lock(&rq->lock); - tmp = amd_sched_rq_select_entity(rq); - mutex_unlock(&rq->lock); -exit: - if (sched->current_entity && (sched->current_entity != tmp)) - wake_entity = sched->current_entity; - sched->current_entity = tmp; - if (wake_entity && wake_entity->need_wakeup) - wake_up(&wake_entity->wait_queue); - return tmp; + return NULL; } /** @@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_rq *rq, uint32_t jobs) { - uint64_t seq_ring = 0; - char name[20]; - if (!(sched && entity && rq)) return -EINVAL; memset(entity, 0, sizeof(struct amd_sched_entity)); - seq_ring = ((uint64_t)sched->ring_id) << 60; - spin_lock_init(&entity->lock); entity->belongto_rq = rq; entity->scheduler = sched; - init_waitqueue_head(&entity->wait_queue); - init_waitqueue_head(&entity->wait_emit); entity->fence_context = fence_context_alloc(1); - snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); - memcpy(entity->name, name, 20); - entity->need_wakeup = false; if(kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL)) return -EINVAL; spin_lock_init(&entity->queue_lock); - atomic64_set(&entity->last_queued_v_seq, seq_ring); - atomic64_set(&entity->last_signaled_v_seq, seq_ring); + atomic_set(&entity->fence_seq, 0); /* Add the entity to the run queue */ amd_sched_rq_add_entity(rq, entity); @@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, * * return true if entity is initialized, false otherwise */ -static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity) +static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity) { return entity->scheduler == sched && entity->belongto_rq != NULL; } -static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity) +/** + * Check if entity is idle + * + * @entity The pointer to a valid scheduler entity + * + * Return true if entity don't has any unscheduled jobs. + */ +static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) { - /** - * Idle means no pending IBs, and the entity is not - * currently being used. - */ - barrier(); - if ((sched->current_entity != entity) && - kfifo_is_empty(&entity->job_queue)) + rmb(); + if (kfifo_is_empty(&entity->job_queue)) return true; return false; @@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, * @sched Pointer to scheduler instance * @entity The pointer to a valid scheduler entity * - * return 0 if succeed. negative error code on failure + * Cleanup and free the allocated resources. */ -int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity) +void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity) { - int r = 0; struct amd_sched_rq *rq = entity->belongto_rq; - if (!is_context_entity_initialized(sched, entity)) - return 0; - entity->need_wakeup = true; + if (!amd_sched_entity_is_initialized(sched, entity)) + return; + /** * The client will not queue more IBs during this fini, consume existing * queued IBs */ - r = wait_event_timeout( - entity->wait_queue, - is_context_entity_idle(sched, entity), - msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS) - ) ? 0 : -1; - - if (r) { - if (entity->is_pending) - DRM_INFO("Entity %p is in waiting state during fini,\ - all pending ibs will be canceled.\n", - entity); - } + wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); amd_sched_rq_remove_entity(rq, entity); kfifo_free(&entity->job_queue); - return r; } /** - * Submit a normal job to the job queue + * Helper to submit a job to the job queue * - * @sched The pointer to the scheduler - * @c_entity The pointer to amd_sched_entity * @job The pointer to job required to submit - * return 0 if succeed. -1 if failed. - * -2 indicate queue is full for this client, client should wait untill - * scheduler consum some queued command. - * -1 other fail. -*/ -int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *c_entity, - void *data, - struct amd_sched_fence **fence) + * + * Returns true if we could submit the job. + */ +static bool amd_sched_entity_in(struct amd_sched_job *job) +{ + struct amd_sched_entity *entity = job->s_entity; + bool added, first = false; + + spin_lock(&entity->queue_lock); + added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job); + + if (added && kfifo_len(&entity->job_queue) == sizeof(job)) + first = true; + + spin_unlock(&entity->queue_lock); + + /* first job wakes up scheduler */ + if (first) + amd_sched_wakeup(job->sched); + + return added; +} + +/** + * Submit a job to the job queue + * + * @job The pointer to job required to submit + * + * Returns 0 for success, negative error code otherwise. + */ +int amd_sched_entity_push_job(struct amd_sched_job *sched_job) { - struct amd_sched_job *job; + struct amd_sched_entity *entity = sched_job->s_entity; + struct amd_sched_fence *fence = amd_sched_fence_create( + entity, sched_job->owner); + int r; if (!fence) - return -EINVAL; - job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); - if (!job) return -ENOMEM; - job->sched = sched; - job->s_entity = c_entity; - job->data = data; - *fence = amd_sched_fence_create(c_entity); - if ((*fence) == NULL) { - kfree(job); - return -EINVAL; - } - fence_get(&(*fence)->base); - job->s_fence = *fence; - while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), - &c_entity->queue_lock) != sizeof(void *)) { - /** - * Current context used up all its IB slots - * wait here, or need to check whether GPU is hung - */ - schedule(); - } - /* first job wake up scheduler */ - if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1) - wake_up_interruptible(&sched->wait_queue); - return 0; + + fence_get(&fence->base); + sched_job->s_fence = fence; + + r = wait_event_interruptible(entity->scheduler->job_scheduled, + amd_sched_entity_in(sched_job)); + + return r; +} + +/** + * Return ture if we can push more jobs to the hw. + */ +static bool amd_sched_ready(struct amd_gpu_scheduler *sched) +{ + return atomic_read(&sched->hw_rq_count) < + sched->hw_submission_limit; +} + +/** + * Wake up the scheduler when it is ready + */ +static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) +{ + if (amd_sched_ready(sched)) + wake_up_interruptible(&sched->wake_up_worker); +} + +/** + * Select next entity containing real IB submissions +*/ +static struct amd_sched_entity * +amd_sched_select_context(struct amd_gpu_scheduler *sched) +{ + struct amd_sched_entity *tmp; + + if (!amd_sched_ready(sched)) + return NULL; + + /* Kernel run queue has higher priority than normal run queue*/ + tmp = amd_sched_rq_select_entity(&sched->kernel_rq); + if (tmp == NULL) + tmp = amd_sched_rq_select_entity(&sched->sched_rq); + + return tmp; } static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) @@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) struct amd_sched_job *sched_job = container_of(cb, struct amd_sched_job, cb); struct amd_gpu_scheduler *sched; - unsigned long flags; sched = sched_job->sched; - atomic64_set(&sched_job->s_entity->last_signaled_v_seq, - sched_job->s_fence->v_seq); amd_sched_fence_signal(sched_job->s_fence); - spin_lock_irqsave(&sched->queue_lock, flags); - list_del(&sched_job->list); - atomic64_dec(&sched->hw_rq_count); - spin_unlock_irqrestore(&sched->queue_lock, flags); - - sched->ops->process_job(sched, sched_job); + atomic_dec(&sched->hw_rq_count); fence_put(&sched_job->s_fence->base); - kfree(sched_job); - wake_up_interruptible(&sched->wait_queue); + sched->ops->process_job(sched_job); + wake_up_interruptible(&sched->wake_up_worker); } static int amd_sched_main(void *param) { - int r; - struct amd_sched_job *job; struct sched_param sparam = {.sched_priority = 1}; - struct amd_sched_entity *c_entity = NULL; struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; + int r; sched_setscheduler(current, SCHED_FIFO, &sparam); while (!kthread_should_stop()) { + struct amd_sched_entity *c_entity = NULL; + struct amd_sched_job *job; struct fence *fence; - wait_event_interruptible(sched->wait_queue, - is_scheduler_ready(sched) && - (c_entity = select_context(sched))); + wait_event_interruptible(sched->wake_up_worker, + kthread_should_stop() || + (c_entity = amd_sched_select_context(sched))); + + if (!c_entity) + continue; + r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); if (r != sizeof(void *)) continue; - r = sched->ops->prepare_job(sched, c_entity, job); - if (!r) { - unsigned long flags; - spin_lock_irqsave(&sched->queue_lock, flags); - list_add_tail(&job->list, &sched->active_hw_rq); - atomic64_inc(&sched->hw_rq_count); - spin_unlock_irqrestore(&sched->queue_lock, flags); - } - mutex_lock(&sched->sched_lock); - fence = sched->ops->run_job(sched, c_entity, job); + atomic_inc(&sched->hw_rq_count); + + fence = sched->ops->run_job(job); if (fence) { r = fence_add_callback(fence, &job->cb, amd_sched_process_job); @@ -378,7 +327,8 @@ static int amd_sched_main(void *param) DRM_ERROR("fence add callback failed (%d)\n", r); fence_put(fence); } - mutex_unlock(&sched->sched_lock); + + wake_up(&sched->job_scheduled); } return 0; } @@ -386,53 +336,42 @@ static int amd_sched_main(void *param) /** * Create a gpu scheduler * - * @device The device context for this scheduler - * @ops The backend operations for this scheduler. - * @id The scheduler is per ring, here is ring id. - * @granularity The minumum ms unit the scheduler will scheduled. - * @preemption Indicate whether this ring support preemption, 0 is no. + * @ops The backend operations for this scheduler. + * @ring The the ring id for the scheduler. + * @hw_submissions Number of hw submissions to do. * - * return the pointer to scheduler for success, otherwise return NULL + * Return the pointer to scheduler for success, otherwise return NULL */ -struct amd_gpu_scheduler *amd_sched_create(void *device, - struct amd_sched_backend_ops *ops, - unsigned ring, - unsigned granularity, - unsigned preemption, - unsigned hw_submission) +struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops, + unsigned ring, unsigned hw_submission, + void *priv) { struct amd_gpu_scheduler *sched; - char name[20]; sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); if (!sched) return NULL; - sched->device = device; sched->ops = ops; - sched->granularity = granularity; sched->ring_id = ring; - sched->preemption = preemption; sched->hw_submission_limit = hw_submission; - snprintf(name, sizeof(name), "gpu_sched[%d]", ring); - mutex_init(&sched->sched_lock); - spin_lock_init(&sched->queue_lock); + sched->priv = priv; + snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring); amd_sched_rq_init(&sched->sched_rq); amd_sched_rq_init(&sched->kernel_rq); - init_waitqueue_head(&sched->wait_queue); - INIT_LIST_HEAD(&sched->active_hw_rq); - atomic64_set(&sched->hw_rq_count, 0); + init_waitqueue_head(&sched->wake_up_worker); + init_waitqueue_head(&sched->job_scheduled); + atomic_set(&sched->hw_rq_count, 0); /* Each scheduler will run on a seperate kernel thread */ - sched->thread = kthread_create(amd_sched_main, sched, name); - if (sched->thread) { - wake_up_process(sched->thread); - return sched; + sched->thread = kthread_run(amd_sched_main, sched, sched->name); + if (IS_ERR(sched->thread)) { + DRM_ERROR("Failed to create scheduler for id %d.\n", ring); + kfree(sched); + return NULL; } - DRM_ERROR("Failed to create scheduler for id %d.\n", ring); - kfree(sched); - return NULL; + return sched; } /** @@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) kfree(sched); return 0; } - -/** - * Get next queued sequence number - * - * @entity The context entity - * - * return the next queued sequence number -*/ -uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) -{ - return atomic64_read(&c_entity->last_queued_v_seq) + 1; -} diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index ceb5918bfbeb..e797796dcad7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -27,8 +27,6 @@ #include <linux/kfifo.h> #include <linux/fence.h> -#define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 - struct amd_gpu_scheduler; struct amd_sched_rq; @@ -41,20 +39,12 @@ struct amd_sched_rq; struct amd_sched_entity { struct list_head list; struct amd_sched_rq *belongto_rq; - spinlock_t lock; - /* the virtual_seq is unique per context per ring */ - atomic64_t last_queued_v_seq; - atomic64_t last_signaled_v_seq; + atomic_t fence_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; struct amd_gpu_scheduler *scheduler; - wait_queue_head_t wait_queue; - wait_queue_head_t wait_emit; - bool is_pending; uint64_t fence_context; - char name[20]; - bool need_wakeup; }; /** @@ -63,26 +53,24 @@ struct amd_sched_entity { * the next entity to emit commands from. */ struct amd_sched_rq { - struct mutex lock; + spinlock_t lock; struct list_head entities; struct amd_sched_entity *current_entity; }; struct amd_sched_fence { struct fence base; - struct fence_cb cb; - struct amd_sched_entity *entity; - uint64_t v_seq; + struct amd_gpu_scheduler *scheduler; spinlock_t lock; + void *owner; }; struct amd_sched_job { - struct list_head list; struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; - void *data; struct amd_sched_fence *s_fence; + void *owner; }; extern const struct fence_ops amd_sched_fence_ops; @@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) * these functions should be implemented in driver side */ struct amd_sched_backend_ops { - int (*prepare_job)(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *c_entity, - struct amd_sched_job *job); - struct fence *(*run_job)(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *c_entity, - struct amd_sched_job *job); - void (*process_job)(struct amd_gpu_scheduler *sched, - struct amd_sched_job *job); + struct fence *(*run_job)(struct amd_sched_job *job); + void (*process_job)(struct amd_sched_job *job); }; /** * One scheduler is implemented for each hardware ring */ struct amd_gpu_scheduler { - void *device; struct task_struct *thread; struct amd_sched_rq sched_rq; struct amd_sched_rq kernel_rq; - struct list_head active_hw_rq; - atomic64_t hw_rq_count; + atomic_t hw_rq_count; struct amd_sched_backend_ops *ops; uint32_t ring_id; - uint32_t granularity; /* in ms unit */ - uint32_t preemption; - wait_queue_head_t wait_queue; - struct amd_sched_entity *current_entity; - struct mutex sched_lock; - spinlock_t queue_lock; + wait_queue_head_t wake_up_worker; + wait_queue_head_t job_scheduled; uint32_t hw_submission_limit; + char name[20]; + void *priv; }; -struct amd_gpu_scheduler *amd_sched_create(void *device, - struct amd_sched_backend_ops *ops, - uint32_t ring, - uint32_t granularity, - uint32_t preemption, - uint32_t hw_submission); +struct amd_gpu_scheduler * +amd_sched_create(struct amd_sched_backend_ops *ops, + uint32_t ring, uint32_t hw_submission, void *priv); int amd_sched_destroy(struct amd_gpu_scheduler *sched); -int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *c_entity, - void *data, - struct amd_sched_fence **fence); - int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, struct amd_sched_rq *rq, uint32_t jobs); -int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity); - -uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); +void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity); +int amd_sched_entity_push_job(struct amd_sched_job *sched_job); struct amd_sched_fence *amd_sched_fence_create( - struct amd_sched_entity *s_entity); + struct amd_sched_entity *s_entity, void *owner); void amd_sched_fence_signal(struct amd_sched_fence *fence); diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index a4751598c0b4..e62c37920e11 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -27,19 +27,22 @@ #include <drm/drmP.h> #include "gpu_scheduler.h" -struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) +struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner) { struct amd_sched_fence *fence = NULL; + unsigned seq; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); if (fence == NULL) return NULL; - fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); - fence->entity = s_entity; + fence->owner = owner; + fence->scheduler = s_entity->scheduler; spin_lock_init(&fence->lock); - fence_init(&fence->base, &amd_sched_fence_ops, - &fence->lock, - s_entity->fence_context, - fence->v_seq); + + seq = atomic_inc_return(&s_entity->fence_seq); + fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock, + s_entity->fence_context, seq); + return fence; } @@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence) static const char *amd_sched_fence_get_timeline_name(struct fence *f) { struct amd_sched_fence *fence = to_amd_sched_fence(f); - return (const char *)fence->entity->name; + return (const char *)fence->scheduler->name; } static bool amd_sched_fence_enable_signaling(struct fence *f) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 94b21ae70ef7..5a2cafb4f1bc 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } else if (radeon_dp_needs_link_train(radeon_connector)) { + /* Don't try to start link training before we + * have the dpcd */ + if (!radeon_dp_getdpcd(radeon_connector)) + return; + /* set it to OFF so that drm_helper_connector_dpms() * won't return immediately since the current state * is ON at this point. |