diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 381 |
1 files changed, 198 insertions, 183 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a195a92d0105..a53777dd371c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -35,6 +35,7 @@ #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" +#include "i915_trace.h" #include "intel_pm.h" struct execute_cb { @@ -119,12 +120,56 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; +static void irq_execute_cb(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); +} + +static void irq_execute_cb_hook(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + cb->hook(container_of(cb->fence, struct i915_request, submit), + &cb->signal->fence); + i915_request_put(cb->signal); + + irq_execute_cb(wrk); +} + +static void __notify_execute_cb(struct i915_request *rq) +{ + struct execute_cb *cb; + + lockdep_assert_held(&rq->lock); + + if (list_empty(&rq->execute_cb)) + return; + + list_for_each_entry(cb, &rq->execute_cb, link) + irq_work_queue(&cb->work); + + /* + * XXX Rollback on __i915_request_unsubmit() + * + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. + */ + INIT_LIST_HEAD(&rq->execute_cb); +} + static inline void -i915_request_remove_from_client(struct i915_request *request) +remove_from_client(struct i915_request *request) { struct drm_i915_file_private *file_priv; - file_priv = request->file_priv; + file_priv = READ_ONCE(request->file_priv); if (!file_priv) return; @@ -136,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void advance_ring(struct i915_request *request) -{ - struct intel_ring *ring = request->ring; - unsigned int tail; - - /* - * We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); - if (list_is_last(&request->ring_link, &ring->request_list)) { - /* - * We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->tail); - list_del(&ring->active_link); - } else { - tail = request->postfix; - } - list_del_init(&request->ring_link); - - ring->head = tail; -} - static void free_capture_list(struct i915_request *request) { struct i915_capture_list *capture; @@ -187,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&rq->timeline->mutex); if (!i915_request_completed(rq)) return false; @@ -199,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); - advance_ring(rq); + /* + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); + rq->ring->head = rq->postfix; /* * Walk through the active list, calling retire on each. This allows @@ -232,6 +253,12 @@ static bool i915_request_retire(struct i915_request *rq) local_irq_disable(); + /* + * We only loosely track inflight requests across preemption, + * and so we may find ourselves attempting to retire a _completed_ + * request that we have removed from the HW and put back on a run + * queue. + */ spin_lock(&rq->engine->active.lock); list_del(&rq->sched.link); spin_unlock(&rq->engine->active.lock); @@ -242,20 +269,25 @@ static bool i915_request_retire(struct i915_request *rq) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); - if (rq->waitboost) { + if (i915_request_has_waitboost(rq)) { GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); atomic_dec(&rq->i915->gt_pm.rps.num_waiters); } + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + __notify_execute_cb(rq); + } + GEM_BUG_ON(!list_empty(&rq->execute_cb)); spin_unlock(&rq->lock); local_irq_enable(); + remove_from_client(rq); + list_del(&rq->link); + intel_context_exit(rq->hw_context); intel_context_unpin(rq->hw_context); - i915_request_remove_from_client(rq); - list_del(&rq->link); - free_capture_list(rq); i915_sched_node_fini(&rq->sched); i915_request_put(rq); @@ -265,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_ring *ring = rq->ring; + struct intel_timeline * const tl = rq->timeline; struct i915_request *tmp; GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -273,62 +305,14 @@ void i915_request_retire_upto(struct i915_request *rq) rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->ring_link)) - return; - do { - tmp = list_first_entry(&ring->request_list, - typeof(*tmp), ring_link); + tmp = list_first_entry(&tl->requests, typeof(*tmp), link); } while (i915_request_retire(tmp) && tmp != rq); } -static void irq_execute_cb(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); -} - -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); -} - -static void __notify_execute_cb(struct i915_request *rq) -{ - struct execute_cb *cb; - - lockdep_assert_held(&rq->lock); - - if (list_empty(&rq->execute_cb)) - return; - - list_for_each_entry(cb, &rq->execute_cb, link) - irq_work_queue(&cb->work); - - /* - * XXX Rollback on __i915_request_unsubmit() - * - * In the future, perhaps when we have an active time-slicing scheduler, - * it will be interesting to unsubmit parallel execution and remove - * busywaits from the GPU until their master is restarted. This is - * quite hairy, we have to carefully rollback the fence and do a - * preempt-to-idle cycle on the target engine, all the while the - * master execute_cb may refire. - */ - INIT_LIST_HEAD(&rq->execute_cb); -} - static int __i915_request_await_execution(struct i915_request *rq, struct i915_request *signal, @@ -512,6 +496,10 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_request_submit(request); + + if (unlikely(fence->error)) + i915_request_skip(request, fence->error); + /* * We need to serialize use of the submit_request() callback * with its hotplugging performed during an emergency @@ -552,29 +540,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void ring_retire_requests(struct intel_ring *ring) +static void retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) + list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) break; } static noinline struct i915_request * -request_alloc_slow(struct intel_context *ce, gfp_t gfp) +request_alloc_slow(struct intel_timeline *tl, gfp_t gfp) { - struct intel_ring *ring = ce->ring; struct i915_request *rq; - if (list_empty(&ring->request_list)) + if (list_empty(&tl->requests)) goto out; if (!gfpflags_allow_blocking(gfp)) goto out; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); rq = kmem_cache_alloc(global.slab_requests, @@ -583,11 +570,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) return rq; /* Ratelimit ourselves to prevent oom from malicious clients */ - rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_last_entry(&tl->requests, typeof(*rq), link); cond_synchronize_rcu(rq->rcustate); /* Retire our old requests in the hope that we free some */ - ring_retire_requests(ring); + retire_requests(tl); out: return kmem_cache_alloc(global.slab_requests, gfp); @@ -596,7 +583,7 @@ out: struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct i915_timeline *tl = ce->ring->timeline; + struct intel_timeline *tl = ce->timeline; struct i915_request *rq; u32 seqno; int ret; @@ -638,14 +625,14 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq = kmem_cache_alloc(global.slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = request_alloc_slow(ce, gfp); + rq = request_alloc_slow(tl, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - ret = i915_timeline_get_seqno(tl, rq, &seqno); + ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; @@ -673,7 +660,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; - rq->waitboost = false; + rq->flags = 0; rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->active_list); @@ -730,15 +717,15 @@ struct i915_request * i915_request_create(struct intel_context *ce) { struct i915_request *rq; - int err; + struct intel_timeline *tl; - err = intel_context_timeline_lock(ce); - if (err) - return ERR_PTR(err); + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return ERR_CAST(tl); /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) + rq = list_first_entry(&tl->requests, typeof(*rq), link); + if (!list_is_last(&rq->link, &tl->requests)) i915_request_retire(rq); intel_context_enter(ce); @@ -748,23 +735,23 @@ i915_request_create(struct intel_context *ce) goto err_unlock; /* Check that we do not interrupt ourselves with a new request */ - rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); return rq; err_unlock: - intel_context_timeline_unlock(ce); + intel_context_timeline_unlock(tl); return rq; } static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->ring_link, &signal->ring->request_list)) + if (list_is_first(&signal->link, &signal->timeline->requests)) return 0; - signal = list_prev_entry(signal, ring_link); - if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + signal = list_prev_entry(signal, link); + if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) return 0; return i915_sw_fence_await_dma_fence(&rq->submit, @@ -818,7 +805,7 @@ emit_semaphore_wait(struct i915_request *to, return err; /* We need to pin the signaler's HWSP until we are finished reading. */ - err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); if (err) return err; @@ -928,8 +915,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) continue; /* Squash repeated waits to the same timelines */ - if (fence->context != rq->i915->mm.unordered_timeline && - i915_timeline_sync_is_later(rq->timeline, fence)) + if (fence->context && + intel_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -942,8 +929,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) return ret; /* Record the latest fence used against each timeline */ - if (fence->context != rq->i915->mm.unordered_timeline) - i915_timeline_sync_set(rq->timeline, fence); + if (fence->context) + intel_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -1027,7 +1014,7 @@ i915_request_await_object(struct i915_request *to, struct dma_fence **shared; unsigned int count, i; - ret = reservation_object_get_fences_rcu(obj->base.resv, + ret = dma_resv_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -1044,7 +1031,7 @@ i915_request_await_object(struct i915_request *to, dma_fence_put(shared[i]); kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_rcu(obj->base.resv); } if (excl) { @@ -1065,6 +1052,9 @@ void i915_request_skip(struct i915_request *rq, int error) GEM_BUG_ON(!IS_ERR_VALUE((long)error)); dma_fence_set_error(&rq->fence, error); + if (rq->infix == rq->postfix) + return; + /* * As this request likely depends on state from the lost * context, clear out all the user operations leaving the @@ -1076,12 +1066,13 @@ void i915_request_skip(struct i915_request *rq, int error) head = 0; } memset(vaddr + head, 0, rq->postfix - head); + rq->infix = rq->postfix; } static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct i915_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = rq->timeline; struct i915_request *prev; /* @@ -1104,7 +1095,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = rcu_dereference_protected(timeline->last_request.request, 1); + prev = rcu_dereference_protected(timeline->last_request.request, + lockdep_is_held(&timeline->mutex)); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1143,7 +1135,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_ring *ring = rq->ring; - struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", @@ -1156,6 +1147,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ GEM_BUG_ON(rq->reserved_space > ring->space); rq->reserved_space = 0; + rq->emitted_jiffies = jiffies; /* * Record the position of the start of the breadcrumb so that @@ -1167,13 +1159,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) GEM_BUG_ON(IS_ERR(cs)); rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(rq); - - list_add_tail(&rq->ring_link, &ring->request_list); - if (list_is_first(&rq->ring_link, &ring->request_list)) - list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->emitted_jiffies = jiffies; + return __i915_request_add_to_timeline(rq); +} +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ /* * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -1185,57 +1176,54 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - local_bh_disable(); i915_sw_fence_commit(&rq->semaphore); - rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) { - struct i915_sched_attr attr = rq->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - - engine->schedule(rq, &attr); - } - rcu_read_unlock(); + if (attr && rq->engine->schedule) + rq->engine->schedule(rq, attr); i915_sw_fence_commit(&rq->submit); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - return prev; } void i915_request_add(struct i915_request *rq) { + struct i915_sched_attr attr = rq->gem_context->sched; + struct intel_timeline * const tl = rq->timeline; struct i915_request *prev; - lockdep_assert_held(&rq->timeline->mutex); - lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + lockdep_assert_held(&tl->mutex); + lockdep_unpin_lock(&tl->mutex, rq->cookie); trace_i915_request_add(rq); prev = __i915_request_commit(rq); /* + * Boost actual workloads past semaphores! + * + * With semaphores we spin on one engine waiting for another, + * simply to reduce the latency of starting our work when + * the signaler completes. However, if there is any other + * work that we could be doing on this engine instead, that + * is better utilisation and will reduce the overall duration + * of the current work. To avoid PI boosting a semaphore + * far in the distance past over useful work, we keep a history + * of any semaphore use along our dependency chain. + */ + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + attr.priority |= I915_PRIORITY_NOSEMAPHORE; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (list_empty(&rq->sched.signalers_list)) + attr.priority |= I915_PRIORITY_WAIT; + + local_bh_disable(); + __i915_request_queue(rq, &attr); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it * has been completed. If the completed request is still here, that @@ -1252,10 +1240,10 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev)) + if (prev && i915_request_completed(prev) && prev->timeline == tl) i915_request_retire_upto(prev); - mutex_unlock(&rq->timeline->mutex); + mutex_unlock(&tl->mutex); } static unsigned long local_clock_us(unsigned int *cpu) @@ -1390,8 +1378,7 @@ long i915_request_wait(struct i915_request *rq, * serialise wait/reset with an explicit lock, we do want * lockdep to detect potential dependency cycles. */ - mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, - 0, 0, _THIS_IP_); + mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_); /* * Optimistic spin before touching IRQs. @@ -1447,8 +1434,10 @@ long i915_request_wait(struct i915_request *rq, for (;;) { set_current_state(state); - if (i915_request_completed(rq)) + if (i915_request_completed(rq)) { + dma_fence_signal(&rq->fence); break; + } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1467,25 +1456,51 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: - mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); + mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_); trace_i915_request_wait_end(rq); return timeout; } bool i915_retire_requests(struct drm_i915_private *i915) { - struct intel_ring *ring, *tmp; + struct intel_gt_timelines *timelines = &i915->gt.timelines; + struct intel_timeline *tl, *tn; + unsigned long flags; + LIST_HEAD(free); + + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) + continue; + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock_irqrestore(&timelines->lock, flags); - lockdep_assert_held(&i915->drm.struct_mutex); + retire_requests(tl); - list_for_each_entry_safe(ring, tmp, - &i915->gt.active_rings, active_link) { - intel_ring_get(ring); /* last rq holds reference! */ - ring_retire_requests(ring); - intel_ring_put(ring); + spin_lock_irqsave(&timelines->lock, flags); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (!--tl->active_count) + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } } + spin_unlock_irqrestore(&timelines->lock, flags); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); - return !list_empty(&i915->gt.active_rings); + return !list_empty(&timelines->active_list); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |