diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_context.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_context_types.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 80 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_sw_fence.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_sw_fence.h | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/selftests/mock_context.c | 2 |
8 files changed, 136 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 68f9c7c7bd6c..39ca3cb6af66 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) intel_context_put(it); + if (ctx->timeline) + i915_timeline_put(ctx->timeline); + kfree(ctx->name); put_pid(ctx->pid); @@ -403,12 +406,16 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv) +i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) { struct i915_gem_context *ctx; lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && + !HAS_EXECLISTS(dev_priv)) + return ERR_PTR(-EINVAL); + /* Reap the most stale context */ contexts_free_first(dev_priv); @@ -431,6 +438,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv) i915_ppgtt_put(ppgtt); } + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + struct i915_timeline *timeline; + + timeline = i915_timeline_create(dev_priv, NULL); + if (IS_ERR(timeline)) { + context_close(ctx); + return ERR_CAST(timeline); + } + + ctx->timeline = timeline; + } + trace_i915_context_create(ctx); return ctx; @@ -459,7 +478,7 @@ i915_gem_context_create_gvt(struct drm_device *dev) if (ret) return ERR_PTR(ret); - ctx = i915_gem_create_context(to_i915(dev)); + ctx = i915_gem_create_context(to_i915(dev), 0); if (IS_ERR(ctx)) goto out; @@ -495,7 +514,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) struct i915_gem_context *ctx; int err; - ctx = i915_gem_create_context(i915); + ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; @@ -658,7 +677,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init_base(&file_priv->vm_idr, 1); mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915); + ctx = i915_gem_create_context(i915, 0); mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); @@ -800,7 +819,7 @@ last_request_on_engine(struct i915_timeline *timeline, rq = i915_active_request_raw(&timeline->last_request, &engine->i915->drm.struct_mutex); - if (rq && rq->engine == engine) { + if (rq && rq->engine->mask & engine->mask) { GEM_TRACE("last request on engine %s: %llx:%llu\n", engine->name, rq->fence.context, rq->fence.seqno); GEM_BUG_ON(rq->timeline != timeline); @@ -1520,7 +1539,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ext_data.ctx = i915_gem_create_context(i915); + ext_data.ctx = i915_gem_create_context(i915, args->flags); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index 63ae8eb21939..e2ec58b10fb2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -41,6 +41,8 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; + struct i915_timeline *timeline; + /** * @ppgtt: unique address space (GTT) * diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1529824d7c61..e9c2094ab8ea 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -992,6 +992,60 @@ void i915_request_skip(struct i915_request *rq, int error) memset(vaddr + head, 0, rq->postfix - head); } +static struct i915_request * +__i915_request_add_to_timeline(struct i915_request *rq) +{ + struct i915_timeline *timeline = rq->timeline; + struct i915_request *prev; + + /* + * Dependency tracking and request ordering along the timeline + * is special cased so that we can eliminate redundant ordering + * operations while building the request (we know that the timeline + * itself is ordered, and here we guarantee it). + * + * As we know we will need to emit tracking along the timeline, + * we embed the hooks into our request struct -- at the cost of + * having to have specialised no-allocation interfaces (which will + * be beneficial elsewhere). + * + * A second benefit to open-coding i915_request_await_request is + * that we can apply a slight variant of the rules specialised + * for timelines that jump between engines (such as virtual engines). + * If we consider the case of virtual engine, we must emit a dma-fence + * to prevent scheduling of the second request until the first is + * complete (to maximise our greedy late load balancing) and this + * precludes optimising to use semaphores serialisation of a single + * timeline across engines. + */ + prev = i915_active_request_raw(&timeline->last_request, + &rq->i915->drm.struct_mutex); + if (prev && !i915_request_completed(prev)) { + if (is_power_of_2(prev->engine->mask | rq->engine->mask)) + i915_sw_fence_await_sw_fence(&rq->submit, + &prev->submit, + &rq->submitq); + else + __i915_sw_fence_await_dma_fence(&rq->submit, + &prev->fence, + &rq->dmaq); + if (rq->engine->schedule) + __i915_sched_node_add_dependency(&rq->sched, + &prev->sched, + &rq->dep, + 0); + } + + spin_lock_irq(&timeline->lock); + list_add_tail(&rq->link, &timeline->requests); + spin_unlock_irq(&timeline->lock); + + GEM_BUG_ON(timeline->seqno != rq->fence.seqno); + __i915_active_request_set(&timeline->last_request, rq); + + return prev; +} + /* * NB: This function is not allowed to fail. Doing so would mean the the * request is not being tracked for completion but the work itself is @@ -1036,31 +1090,7 @@ void i915_request_add(struct i915_request *request) GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); - /* - * Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - - prev = i915_active_request_raw(&timeline->last_request, - &request->i915->drm.struct_mutex); - if (prev && !i915_request_completed(prev)) { - i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, - &request->submitq); - if (engine->schedule) - __i915_sched_node_add_dependency(&request->sched, - &prev->sched, - &request->dep, - 0); - } - - spin_lock_irq(&timeline->lock); - list_add_tail(&request->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); - - GEM_BUG_ON(timeline->seqno != request->fence.seqno); - __i915_active_request_set(&timeline->last_request, request); + prev = __i915_request_add_to_timeline(request); list_add_tail(&request->ring_link, &ring->request_list); if (list_is_first(&request->ring_link, &ring->request_list)) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8c8fa5010644..cd6c130964cd 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -128,7 +128,10 @@ struct i915_request { * It is used by the driver to then queue the request for execution. */ struct i915_sw_fence submit; - wait_queue_entry_t submitq; + union { + wait_queue_entry_t submitq; + struct i915_sw_dma_fence_cb dmaq; + }; struct list_head execute_cb; /* diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 8d1400d378d7..5387aafd3424 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp); } -struct i915_sw_dma_fence_cb { - struct dma_fence_cb base; - struct i915_sw_fence *fence; -}; - struct i915_sw_dma_fence_cb_timer { struct i915_sw_dma_fence_cb base; struct dma_fence *dma; @@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, return ret; } +static void __dma_i915_sw_fence_wake(struct dma_fence *dma, + struct dma_fence_cb *data) +{ + struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + + i915_sw_fence_complete(cb->fence); +} + +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, + struct dma_fence *dma, + struct i915_sw_dma_fence_cb *cb) +{ + int ret; + + debug_fence_assert(fence); + + if (dma_fence_is_signaled(dma)) + return 0; + + cb->fence = fence; + i915_sw_fence_await(fence); + + ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake); + if (ret == 0) { + ret = 1; + } else { + i915_sw_fence_complete(fence); + if (ret == -ENOENT) /* fence already signaled */ + ret = 0; + } + + return ret; +} + int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct reservation_object *resv, const struct dma_fence_ops *exclude, diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 6dec9e1d1102..9cb5c3b307a6 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -9,14 +9,13 @@ #ifndef _I915_SW_FENCE_H_ #define _I915_SW_FENCE_H_ +#include <linux/dma-fence.h> #include <linux/gfp.h> #include <linux/kref.h> #include <linux/notifier.h> /* for NOTIFY_DONE */ #include <linux/wait.h> struct completion; -struct dma_fence; -struct dma_fence_ops; struct reservation_object; struct i915_sw_fence { @@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, struct i915_sw_fence *after, gfp_t gfp); + +struct i915_sw_dma_fence_cb { + struct dma_fence_cb base; + struct i915_sw_fence *fence; +}; + +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, + struct dma_fence *dma, + struct i915_sw_dma_fence_cb *cb); int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, struct dma_fence *dma, unsigned long timeout, gfp_t gfp); + int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct reservation_object *resv, const struct dma_fence_ops *exclude, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 35f7ef9e75c8..66bc3cd4e166 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2802,7 +2802,10 @@ err_unpin_ctx: static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) { - return i915_timeline_create(ctx->i915, NULL); + if (ctx->timeline) + return i915_timeline_get(ctx->timeline); + else + return i915_timeline_create(ctx->i915, NULL); } static int execlists_context_deferred_alloc(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 81e5ace18b81..0426093bf1d9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -97,7 +97,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915); + ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; |