diff options
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_context.c | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_timeline.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/selftests/mock_timeline.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_active.c | 107 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_active.h | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_active_types.h | 15 |
7 files changed, 62 insertions, 102 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ef7bc41ffffa..b5e9c35ec6b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,27 +310,8 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - /* - * Ideally, we just want to insert our foreign fence as - * a barrier into the remove context, such that this operation - * occurs after all current operations in that context, and - * all future operations must occur after this. - * - * Currently, the timeline->last_request tracking is guarded - * by its mutex and so we must obtain that to atomically - * insert our barrier. However, since we already hold our - * timeline->mutex, we must be careful against potential - * inversion if we are the kernel_context as the remote context - * will itself poke at the kernel_context when it needs to - * unpin. Ergo, if already locked, we drop both locks and - * try again (through the magic of userspace repeating EAGAIN). - */ - if (!mutex_trylock(&tl->mutex)) - return -EAGAIN; - /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); - mutex_unlock(&tl->mutex); if (err) return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 0e1ad4a4bd97..21183ad87368 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -183,7 +183,7 @@ static void call_idle_barriers(struct intel_engine_cs *engine) container_of((struct list_head *)node, typeof(*cb), node); - cb->func(NULL, cb); + cb->func(ERR_PTR(-EAGAIN), cb); } } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index c1d2419444f8..038e05a6336c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 2a77c051f36a..aeb1d1f616e8 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index dca15ace88f6..479195ecbc6c 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -186,18 +186,33 @@ active_retire(struct i915_active *ref) __active_retire(ref); } +static inline struct dma_fence ** +__active_fence_slot(struct i915_active_fence *active) +{ + return (struct dma_fence ** __force)&active->fence; +} + +static inline bool +active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_active_fence *active = + container_of(cb, typeof(*active), cb); + + return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; +} + static void node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - i915_active_fence_cb(fence, cb); - active_retire(container_of(cb, struct active_node, base.cb)->ref); + if (active_fence_cb(fence, cb)) + active_retire(container_of(cb, struct active_node, base.cb)->ref); } static void excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - i915_active_fence_cb(fence, cb); - active_retire(container_of(cb, struct i915_active, excl.cb)); + if (active_fence_cb(fence, cb)) + active_retire(container_of(cb, struct i915_active, excl.cb)); } static struct i915_active_fence * @@ -244,7 +259,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) } node = prealloc; - __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire); + __i915_active_fence_init(&node->base, NULL, node_retire); node->ref = ref; node->timeline = idx; @@ -281,7 +296,7 @@ void __i915_active_init(struct i915_active *ref, init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); - __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire); + __i915_active_fence_init(&ref->excl, NULL, excl_retire); INIT_WORK(&ref->work, active_work); } @@ -376,15 +391,8 @@ void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) /* We expect the caller to manage the exclusive timeline ordering */ GEM_BUG_ON(i915_active_is_idle(ref)); - /* - * As we don't know which mutex the caller is using, we told a small - * lie to the debug code that it is using the i915_active.mutex; - * and now we must stick to that lie. - */ - mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_); if (!__i915_active_fence_set(&ref->excl, f)) atomic_inc(&ref->count); - mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_); } bool i915_active_acquire_if_busy(struct i915_active *ref) @@ -615,10 +623,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, goto unwind; } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - node->base.lock = - &engine->kernel_context->timeline->mutex; -#endif RCU_INIT_POINTER(node->base.fence, NULL); node->base.cb.func = node_retire; node->timeline = idx; @@ -639,6 +643,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node->base.cb.node.prev = (void *)engine; atomic_inc(&ref->count); } + GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); GEM_BUG_ON(barrier_to_engine(node) != engine); llist_add(barrier_to_ll(node), &ref->preallocated_barriers); @@ -702,6 +707,11 @@ void i915_active_acquire_barrier(struct i915_active *ref) } } +static struct dma_fence **ll_to_fence_slot(struct llist_node *node) +{ + return __active_fence_slot(&barrier_from_ll(node)->base); +} + void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -721,19 +731,13 @@ void i915_request_add_active_barriers(struct i915_request *rq) */ spin_lock_irqsave(&rq->lock, flags); llist_for_each_safe(node, next, node) { - RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence); - smp_wmb(); /* serialise with reuse_idle_barrier */ + /* serialise with reuse_idle_barrier */ + smp_store_mb(*ll_to_fence_slot(node), &rq->fence); list_add_tail((struct list_head *)node, &rq->fence.cb_list); } spin_unlock_irqrestore(&rq->lock, flags); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) -#define active_is_held(active) lockdep_is_held((active)->lock) -#else -#define active_is_held(active) true -#endif - /* * __i915_active_fence_set: Update the last active fence along its timeline * @active: the active tracker @@ -744,7 +748,7 @@ void i915_request_add_active_barriers(struct i915_request *rq) * fence onto this one. Returns the previous fence (if not already completed), * which the caller must ensure is executed before the new fence. To ensure * that the order of fences within the timeline of the i915_active_fence is - * maintained, it must be locked by the caller. + * understood, it should be locked by the caller. */ struct dma_fence * __i915_active_fence_set(struct i915_active_fence *active, @@ -753,34 +757,41 @@ __i915_active_fence_set(struct i915_active_fence *active, struct dma_fence *prev; unsigned long flags; - /* NB: must be serialised by an outer timeline mutex (active->lock) */ - spin_lock_irqsave(fence->lock, flags); + if (fence == rcu_access_pointer(active->fence)) + return fence; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); - prev = rcu_dereference_protected(active->fence, active_is_held(active)); + /* + * Consider that we have two threads arriving (A and B), with + * C already resident as the active->fence. + * + * A does the xchg first, and so it sees C or NULL depending + * on the timing of the interrupt handler. If it is NULL, the + * previous fence must have been signaled and we know that + * we are first on the timeline. If it is still present, + * we acquire the lock on that fence and serialise with the interrupt + * handler, in the process removing it from any future interrupt + * callback. A will then wait on C before executing (if present). + * + * As B is second, it sees A as the previous fence and so waits for + * it to complete its transition and takes over the occupancy for + * itself -- remembering that it needs to wait on A before executing. + * + * Note the strong ordering of the timeline also provides consistent + * nesting rules for the fence->lock; the inner lock is always the + * older lock. + */ + spin_lock_irqsave(fence->lock, flags); + prev = xchg(__active_fence_slot(active), fence); if (prev) { GEM_BUG_ON(prev == fence); spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); __list_del_entry(&active->cb.node); spin_unlock(prev->lock); /* serialise with prev->cb_list */ - - /* - * active->fence is reset by the callback from inside - * interrupt context. We need to serialise our list - * manipulation with the fence->lock to prevent the prev - * being lost inside an interrupt (it can't be replaced as - * no other caller is allowed to enter __i915_active_fence_set - * as we hold the timeline lock). After serialising with - * the callback, we need to double check which ran first, - * our list_del() [decoupling prev from the callback] or - * the callback... - */ - prev = rcu_access_pointer(active->fence); } - - rcu_assign_pointer(active->fence, fence); + GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); list_add_tail(&active->cb.node, &fence->cb_list); - spin_unlock_irqrestore(fence->lock, flags); return prev; @@ -792,10 +803,6 @@ int i915_active_fence_set(struct i915_active_fence *active, struct dma_fence *fence; int err = 0; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - lockdep_assert_held(active->lock); -#endif - /* Must maintain timeline ordering wrt previous active requests */ rcu_read_lock(); fence = __i915_active_fence_set(active, &rq->fence); @@ -812,7 +819,7 @@ int i915_active_fence_set(struct i915_active_fence *active, void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) { - i915_active_fence_cb(fence, cb); + active_fence_cb(fence, cb); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 5dd62323b92a..3208cc2e8c1a 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -61,19 +61,15 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); */ static inline void __i915_active_fence_init(struct i915_active_fence *active, - struct mutex *lock, void *fence, dma_fence_func_t fn) { RCU_INIT_POINTER(active->fence, fence); active->cb.func = fn ?: i915_active_noop; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - active->lock = lock; -#endif } -#define INIT_ACTIVE_FENCE(A, LOCK) \ - __i915_active_fence_init((A), (LOCK), NULL, NULL) +#define INIT_ACTIVE_FENCE(A) \ + __i915_active_fence_init((A), NULL, NULL) struct dma_fence * __i915_active_fence_set(struct i915_active_fence *active, @@ -127,15 +123,6 @@ i915_active_fence_isset(const struct i915_active_fence *active) return rcu_access_pointer(active->fence); } -static inline void -i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -{ - struct i915_active_fence *active = - container_of(cb, typeof(*active), cb); - - RCU_INIT_POINTER(active->fence, NULL); -} - /* * GPU activity tracking * diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 96aed0ee700a..6360c3e4b765 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -20,21 +20,6 @@ struct i915_active_fence { struct dma_fence __rcu *fence; struct dma_fence_cb cb; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - /* - * Incorporeal! - * - * Updates to the i915_active_request must be serialised under a lock - * to ensure that the timeline is ordered. Normally, this is the - * timeline->mutex, but another mutex may be used so long as it is - * done so consistently. - * - * For lockdep tracking of the above, we store the lock we intend - * to always use for updates of this i915_active_request during - * construction and assert that is held on every update. - */ - struct mutex *lock; -#endif }; struct active_node; |