diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 299 |
1 files changed, 129 insertions, 170 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 37aef1308573..ce446716d092 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -42,22 +42,17 @@ #include "i915_active.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_trace.h" #include "intel_pm.h" struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - void (*hook)(struct i915_request *rq, struct dma_fence *signal); struct i915_request *signal; }; -static struct i915_global_request { - struct i915_global base; - struct kmem_cache *slab_requests; - struct kmem_cache *slab_execute_cbs; -} global; +static struct kmem_cache *slab_requests; +static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { @@ -108,13 +103,16 @@ static signed long i915_fence_wait(struct dma_fence *fence, struct kmem_cache *i915_request_slab_cache(void) { - return global.slab_requests; + return slab_requests; } static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); + GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -126,41 +124,19 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure - * - * We do not hold a reference to the engine here and so have to be - * very careful in what rq->engine we poke. The virtual engine is - * referenced via the rq->context and we released that ref during - * i915_request_retire(), ergo we must not dereference a virtual - * engine here. Not that we would want to, as the only consumer of - * the reserved engine->request_pool is the power management parking, - * which must-not-fail, and that is only run on the physical engines. - * - * Since the request must have been executed to be have completed, - * we know that it will have been processed by the HW and will - * not be unsubmitted again, so rq->engine and rq->execution_mask - * at this point is stable. rq->execution_mask will be a single - * bit if the last and _only_ engine it could execution on was a - * physical engine, if it's multiple bits then it started on and - * could still be on a virtual engine. Thus if the mask is not a - * power-of-two we assume that rq->engine may still be a virtual - * engine and so a dangling invalid pointer that we cannot dereference - * - * For example, consider the flow of a bonded request through a virtual - * engine. The request is created with a wide engine mask (all engines - * that we might execute on). On processing the bond, the request mask - * is reduced to one or more engines. If the request is subsequently - * bound to a single engine, it will then be constrained to only - * execute on that engine and never returned to the virtual engine - * after timeslicing away, see __unwind_incomplete_requests(). Thus we - * know that if the rq->execution_mask is a single bit, rq->engine - * can be a physical engine with the exact corresponding mask. + * Keep one request on each engine for reserved use under mempressure, + * do not use with virtual engines as this really is only needed for + * kernel contexts. */ - if (is_power_of_2(rq->execution_mask) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) + if (!intel_engine_is_virtual(rq->engine) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) { + intel_context_put(rq->context); return; + } + + intel_context_put(rq->context); - kmem_cache_free(global.slab_requests, rq); + kmem_cache_free(slab_requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -177,18 +153,7 @@ static void irq_execute_cb(struct irq_work *wrk) struct execute_cb *cb = container_of(wrk, typeof(*cb), work); i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); -} - -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); + kmem_cache_free(slab_execute_cbs, cb); } static __always_inline void @@ -216,7 +181,7 @@ static bool irq_work_imm(struct irq_work *wrk) return false; } -static void __notify_execute_cb_imm(struct i915_request *rq) +void i915_request_notify_execute_cb_imm(struct i915_request *rq) { __notify_execute_cb(rq, irq_work_imm); } @@ -272,11 +237,11 @@ i915_request_active_engine(struct i915_request *rq, * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); + spin_lock_irq(&locked->sched_engine->lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); + spin_unlock(&locked->sched_engine->lock); locked = engine; - spin_lock(&locked->active.lock); + spin_lock(&locked->sched_engine->lock); } if (i915_request_is_active(rq)) { @@ -285,42 +250,11 @@ i915_request_active_engine(struct i915_request *rq, ret = true; } - spin_unlock_irq(&locked->active.lock); + spin_unlock_irq(&locked->sched_engine->lock); return ret; } - -static void remove_from_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); - locked = engine; - } - list_del_init(&rq->sched.link); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); - - /* Prevent further __await_execution() registering a cb, then flush */ - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_unlock_irq(&locked->active.lock); - - __notify_execute_cb_imm(rq); -} - static void __rq_init_watchdog(struct i915_request *rq) { rq->watchdog.timer.function = NULL; @@ -417,8 +351,7 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - if (!list_empty(&rq->sched.link)) - remove_from_engine(rq); + rq->engine->remove_active_request(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -443,6 +376,7 @@ void i915_request_retire_upto(struct i915_request *rq) do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); + GEM_BUG_ON(!i915_request_completed(tmp)); } while (i915_request_retire(tmp) && tmp != rq); } @@ -517,19 +451,14 @@ static bool __request_in_flight(const struct i915_request *signal) static int __await_execution(struct i915_request *rq, struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) { - if (hook) - hook(rq, &signal->fence); + if (i915_request_is_active(signal)) return 0; - } - cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + cb = kmem_cache_alloc(slab_execute_cbs, gfp); if (!cb) return -ENOMEM; @@ -537,12 +466,6 @@ __await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); - if (hook) { - cb->hook = hook; - cb->signal = i915_request_get(signal); - cb->work.func = irq_execute_cb_hook; - } - /* * Register the callback first, then see if the signaler is already * active. This ensures that if we race with the @@ -559,7 +482,7 @@ __await_execution(struct i915_request *rq, if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) - __notify_execute_cb_imm(signal); + i915_request_notify_execute_cb_imm(signal); } return 0; @@ -637,7 +560,7 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * With the advent of preempt-to-busy, we frequently encounter @@ -649,7 +572,7 @@ bool __i915_request_submit(struct i915_request *request) * * We must remove the request from the caller's priority queue, * and the caller must only call us when the request is in their - * priority queue, under the active.lock. This ensures that the + * priority queue, under the sched_engine->lock. This ensures that the * request has *not* yet been retired and we can safely move * the request into the engine->active.list where it will be * dropped upon retiring. (Otherwise if resubmit a *retired* @@ -690,11 +613,15 @@ bool __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); trace_i915_request_execute(request); - engine->serial++; + if (engine->bump_serial) + engine->bump_serial(engine); + else + engine->serial++; + result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->active.requests); + engine->add_active_request(request); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -724,11 +651,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -742,7 +669,7 @@ void __i915_request_unsubmit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * Before we remove this breadcrumb from the signal list, we have @@ -775,23 +702,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void __cancel_request(struct i915_request *rq) -{ - struct intel_engine_cs *engine = NULL; - - i915_request_active_engine(rq, &engine); - - if (engine && intel_engine_pulse(engine)) - intel_gt_handle_error(engine->gt, engine->mask, 0, - "request cancellation by %s", - current->comm); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void i915_request_cancel(struct i915_request *rq, int error) @@ -801,7 +716,7 @@ void i915_request_cancel(struct i915_request *rq, int error) set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - __cancel_request(rq); + intel_context_cancel_request(rq->context, rq); } static int __i915_sw_fence_call @@ -889,7 +804,7 @@ request_alloc_slow(struct intel_timeline *tl, rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (rq) return rq; @@ -902,7 +817,7 @@ request_alloc_slow(struct intel_timeline *tl, retire_requests(tl); out: - return kmem_cache_alloc(global.slab_requests, gfp); + return kmem_cache_alloc(slab_requests, gfp); } static void __i915_request_ctor(void *arg) @@ -963,7 +878,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) * * Do not use kmem_cache_zalloc() here! */ - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); @@ -973,7 +888,19 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - rq->context = ce; + /* + * Hold a reference to the intel_context over life of an i915_request. + * Without this an i915_request can exist after the context has been + * destroyed (e.g. request retired, context closed, but user space holds + * a reference to the request from an out fence). In the case of GuC + * submission + virtual engine, the engine that the request references + * is also destroyed which can trigger bad pointer dref in fence ops + * (e.g. i915_fence_get_driver_name). We could likely change these + * functions to avoid touching the engine but let's just be safe and + * hold the intel_context reference. In execlist mode the request always + * eventually points to a physical engine so this isn't an issue. + */ + rq->context = intel_context_get(ce); rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -996,6 +923,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ + rq->guc_prio = GUC_PRIO_INIT; + /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); @@ -1050,7 +979,8 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: - kmem_cache_free(global.slab_requests, rq); + intel_context_put(ce); + kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); return ERR_PTR(ret); @@ -1253,7 +1183,7 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; /* Only submit our spinner after the signaler is running! */ - if (__await_execution(to, from, NULL, gfp)) + if (__await_execution(to, from, gfp)) goto await_fence; if (__emit_semaphore_wait(to, from, from->fence.seqno)) @@ -1284,16 +1214,14 @@ static int intel_timeline_sync_set_start(struct intel_timeline *tl, static int __i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct i915_request *from) { int err; GEM_BUG_ON(intel_context_is_barrier(from->context)); /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); + err = __await_execution(to, from, I915_FENCE_GFP); if (err) return err; @@ -1343,7 +1271,7 @@ __i915_request_await_execution(struct i915_request *to, } /* Couple the dependency tree for PI on this exposed to->fence */ - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { err = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_WEAK); @@ -1406,9 +1334,7 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; @@ -1439,8 +1365,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) @@ -1466,7 +1391,7 @@ await_request_submit(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); else - return __i915_request_await_execution(to, from, NULL); + return __i915_request_await_execution(to, from); } static int @@ -1482,7 +1407,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_EXTERNAL); @@ -1490,7 +1415,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + if (!intel_engine_uses_guc(to->engine) && + is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); @@ -1649,6 +1575,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); if (prev && !__i915_request_is_complete(prev)) { + bool uses_guc = intel_engine_uses_guc(rq->engine); + /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1659,7 +1587,9 @@ __i915_request_add_to_timeline(struct i915_request *rq) i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) + if ((!uses_guc && + is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || + (uses_guc && prev->context == rq->context)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@ -1667,7 +1597,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) __i915_sw_fence_await_dma_fence(&rq->submit, &prev->fence, &rq->dmaq); - if (rq->engine->schedule) + if (rq->engine->sched_engine->schedule) __i915_sched_node_add_dependency(&rq->sched, &prev->sched, &rq->dep, @@ -1739,8 +1669,8 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - if (attr && rq->engine->schedule) - rq->engine->schedule(rq, attr); + if (attr && rq->engine->sched_engine->schedule) + rq->engine->sched_engine->schedule(rq, attr); local_bh_disable(); __i915_request_queue_bh(rq); @@ -2100,31 +2030,61 @@ void i915_request_show(struct drm_printer *m, name); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_request.c" -#include "selftests/i915_request.c" -#endif +static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + u32 ring = ENGINE_READ(engine, RING_START); + + return ring == i915_ggtt_offset(rq->ring->vma); +} -static void i915_global_request_shrink(void) +static bool match_ring(struct i915_request *rq) { - kmem_cache_shrink(global.slab_execute_cbs); - kmem_cache_shrink(global.slab_requests); + struct intel_engine_cs *engine; + bool found; + int i; + + if (!intel_engine_is_virtual(rq->engine)) + return engine_match_ring(rq->engine, rq); + + found = false; + i = 0; + while ((engine = intel_engine_get_sibling(rq->engine, i++))) { + found = engine_match_ring(engine, rq); + if (found) + break; + } + + return found; } -static void i915_global_request_exit(void) +enum i915_request_state i915_test_request_state(struct i915_request *rq) { - kmem_cache_destroy(global.slab_execute_cbs); - kmem_cache_destroy(global.slab_requests); + if (i915_request_completed(rq)) + return I915_REQUEST_COMPLETE; + + if (!i915_request_started(rq)) + return I915_REQUEST_PENDING; + + if (match_ring(rq)) + return I915_REQUEST_ACTIVE; + + return I915_REQUEST_QUEUED; } -static struct i915_global_request global = { { - .shrink = i915_global_request_shrink, - .exit = i915_global_request_exit, -} }; +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_request.c" +#endif + +void i915_request_module_exit(void) +{ + kmem_cache_destroy(slab_execute_cbs); + kmem_cache_destroy(slab_requests); +} -int __init i915_global_request_init(void) +int __init i915_request_module_init(void) { - global.slab_requests = + slab_requests = kmem_cache_create("i915_request", sizeof(struct i915_request), __alignof__(struct i915_request), @@ -2132,20 +2092,19 @@ int __init i915_global_request_init(void) SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU, __i915_request_ctor); - if (!global.slab_requests) + if (!slab_requests) return -ENOMEM; - global.slab_execute_cbs = KMEM_CACHE(execute_cb, + slab_execute_cbs = KMEM_CACHE(execute_cb, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_execute_cbs) + if (!slab_execute_cbs) goto err_requests; - i915_global_register(&global.base); return 0; err_requests: - kmem_cache_destroy(global.slab_requests); + kmem_cache_destroy(slab_requests); return -ENOMEM; } |