diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 1377 |
1 files changed, 881 insertions, 496 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 82b7ace62d97..d42584439f51 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -136,9 +136,12 @@ #include "gem/i915_gem_context.h" #include "i915_drv.h" -#include "i915_gem_render_state.h" +#include "i915_perf.h" +#include "i915_trace.h" #include "i915_vgpu.h" #include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -161,6 +164,15 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) +#define GEN12_IDLE_CTX_ID 0x7FF +#define GEN12_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 @@ -214,13 +226,34 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); + static void execlists_init_reg_state(u32 *reg_state, struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + +static inline void +ring_set_paused(const struct intel_engine_cs *engine, int state) +{ + /* + * We inspect HWS_PREEMPT with a semaphore inside + * engine->emit_fini_breadcrumb. If the dword is true, + * the ring is paused as the semaphore will busywait + * until the dword is false. + */ + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; + if (state) + wmb(); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -236,6 +269,17 @@ static int effective_prio(const struct i915_request *rq) int prio = rq_prio(rq); /* + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). + */ + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + + /* * On unwinding the active request, we give it a priority bump * if it has completed waiting on any semaphore. If we know that * the request has already started, we can prevent an unwanted @@ -245,6 +289,7 @@ static int effective_prio(const struct i915_request *rq) prio |= I915_PRIORITY_NOSEMAPHORE; /* Restrict mere WAIT boosts from triggering preemption */ + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ return prio | __NO_PREEMPTION; } @@ -271,10 +316,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, { int last_prio; - if (!engine->preempt_context) - return false; - - if (i915_request_completed(rq)) + if (!intel_engine_has_semaphores(engine)) return false; /* @@ -338,9 +380,6 @@ __maybe_unused static inline bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { - const struct intel_engine_execlists *execlists = - &prev->engine->execlists; - /* * Without preemption, the prev may refer to the still active element * which we refuse to let go. @@ -348,7 +387,7 @@ assert_priority_queue(const struct i915_request *prev, * Even with preemption, there are times when we think it is better not * to preempt and leave an ostensibly lower priority request in flight. */ - if (port_request(execlists->port) == prev) + if (i915_request_is_active(prev)) return true; return rq_prio(prev) >= rq_prio(next); @@ -389,13 +428,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = ctx->desc_template; /* bits 0-11 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc = INTEL_LEGACY_32B_CONTEXT; + if (i915_vm_is_4lvl(ce->vm)) + desc = INTEL_LEGACY_64B_CONTEXT; + desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; + + desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (IS_GEN(engine->i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); - /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing @@ -442,13 +485,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct intel_engine_cs *owner; if (i915_request_completed(rq)) - break; + continue; /* XXX */ __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->hw_context->inflight); - /* * Push the request back into the queue for later resubmission. * If this request is not native to this physical engine (i.e. @@ -468,6 +509,19 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + /* + * Decouple the virtual breadcrumb before moving it + * back to the virtual engine -- we don't want the + * request to complete in the background and try + * and cancel the breadcrumb on the virtual engine + * (instead of the old engine where it is linked)! + */ + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) { + spin_lock(&rq->lock); + i915_request_cancel_breadcrumb(rq); + spin_unlock(&rq->lock); + } rq->engine = owner; owner->submit_request(rq); active = NULL; @@ -500,32 +554,45 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -inline void -execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port) +static inline struct intel_engine_cs * +__execlists_schedule_in(struct i915_request *rq) { - execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); -} + struct intel_engine_cs * const engine = rq->engine; + struct intel_context * const ce = rq->hw_context; -inline void -execlists_user_end(struct intel_engine_execlists *execlists) -{ - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); + intel_context_get(ce); + + intel_gt_pm_get(engine->gt); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(engine); + + return engine; } -static inline void -execlists_context_schedule_in(struct i915_request *rq) +static inline struct i915_request * +execlists_schedule_in(struct i915_request *rq, int idx) { - GEM_BUG_ON(rq->hw_context->inflight); + struct intel_context * const ce = rq->hw_context; + struct intel_engine_cs *old; - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); - rq->hw_context->inflight = rq->engine; + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); + trace_i915_request_in(rq, idx); + + old = READ_ONCE(ce->inflight); + do { + if (!old) { + WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); + break; + } + } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); + + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + return i915_request_get(rq); } -static void kick_siblings(struct i915_request *rq) +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) { - struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine); + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); if (next && next->execution_mask & ~rq->execution_mask) @@ -533,29 +600,53 @@ static void kick_siblings(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq, unsigned long status) +__execlists_schedule_out(struct i915_request *rq, + struct intel_engine_cs * const engine) { - rq->hw_context->inflight = NULL; - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, status); - trace_i915_request_out(rq); + struct intel_context * const ce = rq->hw_context; + + intel_engine_context_out(engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + intel_gt_pm_put(engine->gt); /* - * If this is part of a virtual engine, its next request may have - * been blocked waiting for access to the active context. We have - * to kick all the siblings again in case we need to switch (e.g. - * the next request is not runnable on this engine). Hopefully, - * we will already have submitted the next request before the - * tasklet runs and do not need to rebuild each virtual tree - * and kick everyone again. + * If this is part of a virtual engine, its next request may + * have been blocked waiting for access to the active context. + * We have to kick all the siblings again in case we need to + * switch (e.g. the next request is not runnable on this + * engine). Hopefully, we will already have submitted the next + * request before the tasklet runs and do not need to rebuild + * each virtual tree and kick everyone again. */ - if (rq->engine != rq->hw_context->engine) - kick_siblings(rq); + if (ce->engine != engine) + kick_siblings(rq, ce); + + intel_context_put(ce); +} + +static inline void +execlists_schedule_out(struct i915_request *rq) +{ + struct intel_context * const ce = rq->hw_context; + struct intel_engine_cs *cur, *old; + + trace_i915_request_out(rq); + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + + old = READ_ONCE(ce->inflight); + do + cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; + while (!try_cmpxchg(&ce->inflight, &old, cur)); + if (!cur) + __execlists_schedule_out(rq, old); + + i915_request_put(rq); } -static u64 execlists_update_context(struct i915_request *rq) +static u64 execlists_update_context(const struct i915_request *rq) { struct intel_context *ce = rq->hw_context; + u64 desc; ce->lrc_reg_state[CTX_RING_TAIL + 1] = intel_ring_set_tail(rq->ring, rq->tail); @@ -576,7 +667,11 @@ static u64 execlists_update_context(struct i915_request *rq) * wmb). */ mb(); - return ce->lrc_desc; + + desc = ce->lrc_desc; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + + return desc; } static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) @@ -590,12 +685,65 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static __maybe_unused void +trace_ports(const struct intel_engine_execlists *execlists, + const char *msg, + struct i915_request * const *ports) +{ + const struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", + engine->name, msg, + ports[0]->fence.context, + ports[0]->fence.seqno, + i915_request_completed(ports[0]) ? "!" : + i915_request_started(ports[0]) ? "*" : + "", + ports[1] ? ports[1]->fence.context : 0, + ports[1] ? ports[1]->fence.seqno : 0); +} + +static __maybe_unused bool +assert_pending_valid(const struct intel_engine_execlists *execlists, + const char *msg) +{ + struct i915_request * const *port, *rq; + struct intel_context *ce = NULL; + + trace_ports(execlists, msg, execlists->pending); + + if (!execlists->pending[0]) + return false; + + if (execlists->pending[execlists_num_ports(execlists)]) + return false; + + for (port = execlists->pending; (rq = *port); port++) { + if (ce == rq->hw_context) + return false; + + ce = rq->hw_context; + if (i915_request_completed(rq)) + continue; + + if (i915_active_is_idle(&ce->active)) + return false; + + if (!i915_vma_is_pinned(ce->state)) + return false; + } + + return ce; +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct execlist_port *port = execlists->port; unsigned int n; + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); + /* * We can skip acquiring intel_runtime_pm_get() here as it was taken * on our behalf by the request (see i915_gem_mark_busy()) and it will @@ -604,7 +752,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * that all ELSP are drained i.e. we have processed the CSB, * before allowing ourselves to idle and calling intel_runtime_pm_put(). */ - GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref)); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); /* * ELSQ note: the submit queue is not cleared after being submitted @@ -613,38 +761,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * of elsq entries, keep this in mind before changing the loop below. */ for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq; - unsigned int count; - u64 desc; - - rq = port_unpack(&port[n], &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_schedule_in(rq); - port_set(&port[n], port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - - GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, n, - port[n].context_id, count, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - rq_prio(rq)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } + struct i915_request *rq = execlists->pending[n]; - write_desc(execlists, desc, n); + write_desc(execlists, + rq ? execlists_update_context(rq) : 0, + n); } /* we need to manually load the submit queue */ if (execlists->ctrl_reg) writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct intel_context *ce) @@ -668,6 +794,7 @@ static bool can_merge_ctx(const struct intel_context *prev, static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { + GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); if (!can_merge_ctx(prev->hw_context, next->hw_context)) @@ -676,58 +803,6 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_request_put(port_request(port)); - - port_set(port, port_pack(i915_request_get(rq), port_count(port))); -} - -static void inject_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = engine->preempt_context; - unsigned int n; - - GEM_BUG_ON(execlists->preempt_complete_status != - upper_32_bits(ce->lrc_desc)); - - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ - GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); - - write_desc(execlists, ce->lrc_desc, n); - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); - execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); - - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); -} - -static void complete_preempt_context(struct intel_engine_execlists *execlists) -{ - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - - if (inject_preempt_hang(execlists)) - return; - - execlists_cancel_port_requests(execlists); - __unwind_incomplete_requests(container_of(execlists, - struct intel_engine_cs, - execlists)); -} - static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -792,7 +867,7 @@ static bool virtual_matches(const struct virtual_engine *ve, * we reuse the register offsets). This is a very small * hystersis on the greedy seelction algorithm. */ - inflight = READ_ONCE(ve->context.inflight); + inflight = intel_context_inflight(&ve->context); if (inflight && inflight != engine) return false; @@ -815,13 +890,120 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_unlock(&old->breadcrumbs.irq_lock); } +static struct i915_request * +last_active(const struct intel_engine_execlists *execlists) +{ + struct i915_request * const *last = execlists->active; + + while (*last && i915_request_completed(*last)) + last++; + + return *last; +} + +static void defer_request(struct i915_request *rq, struct list_head * const pl) +{ + LIST_HEAD(list); + + /* + * We want to move the interrupted request to the back of + * the round-robin list (i.e. its priority level), but + * in doing so, we must then move all requests that were in + * flight and were waiting for the interrupted request to + * be run after it again. + */ + do { + struct i915_dependency *p; + + GEM_BUG_ON(i915_request_is_active(rq)); + list_move_tail(&rq->sched.link, pl); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + /* No waiter should start before its signaler */ + GEM_BUG_ON(i915_request_started(w) && + !i915_request_completed(rq)); + + GEM_BUG_ON(i915_request_is_active(w)); + if (list_empty(&w->sched.link)) + continue; /* Not yet submitted; unready */ + + if (rq_prio(w) < rq_prio(rq)) + continue; + + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void defer_active(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = __unwind_incomplete_requests(engine); + if (!rq) + return; + + defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); +} + +static bool +need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + int hint; + + if (!intel_engine_has_semaphores(engine)) + return false; + + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return false; + + hint = max(rq_prio(list_next_entry(rq, sched.link)), + engine->execlists.queue_priority_hint); + + return hint >= effective_prio(rq); +} + +static int +switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return INT_MIN; + + return rq_prio(list_next_entry(rq, sched.link)); +} + +static bool +enable_timeslice(const struct intel_engine_execlists *execlists) +{ + const struct i915_request *rq = *execlists->active; + + if (i915_request_completed(rq)) + return false; + + return execlists->switch_priority_hint >= effective_prio(rq); +} + +static void record_preemption(struct intel_engine_execlists *execlists) +{ + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - struct i915_request *last = port_request(port); + struct i915_request **port = execlists->pending; + struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -867,65 +1049,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } + /* + * If the queue is higher priority than the last + * request in the currently active context, submit afresh. + * We will resubmit again afterwards in case we need to split + * the active context to interject the preemption request, + * i.e. we will retrigger preemption following the ack in case + * of trouble. + */ + last = last_active(execlists); if (last) { - /* - * Don't resubmit or switch until all outstanding - * preemptions (lite-restore) are seen. Then we - * know the next preemption status we see corresponds - * to this ELSP update. - */ - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(!port_count(&port[0])); + if (need_preempt(engine, last, rb)) { + GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", + engine->name, + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + record_preemption(execlists); - /* - * If we write to ELSP a second time before the HW has had - * a chance to respond to the previous write, we can confuse - * the HW and hit "undefined behaviour". After writing to ELSP, - * we must then wait until we see a context-switch event from - * the HW to indicate that it has had a chance to respond. - */ - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return; + /* + * Don't let the RING_HEAD advance past the breadcrumb + * as we unwind (and until we resubmit) so that we do + * not accidentally tell it to go backwards. + */ + ring_set_paused(engine, 1); - if (need_preempt(engine, last, rb)) { - inject_preempt_context(engine); - return; - } + /* + * Note that we have not stopped the GPU at this point, + * so we are unwinding the incomplete requests as they + * remain inflight and so by the time we do complete + * the preemption, some of the unwound requests may + * complete! + */ + __unwind_incomplete_requests(engine); - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). However, we have to double check that the - * priorities of the ports haven't been switch. - */ - if (port_count(&port[1])) - return; + /* + * If we need to return to the preempted context, we + * need to skip the lite-restore and force it to + * reload the RING_TAIL. Otherwise, the HW has a + * tendency to ignore us rewinding the TAIL to the + * end of an earlier request. + */ + last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last = NULL; + } else if (need_timeslice(engine, last) && + !timer_pending(&engine->execlists.timer)) { + GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", + engine->name, + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; + ring_set_paused(engine, 1); + defer_active(engine); + + /* + * Unlike for preemption, if we rewind and continue + * executing the same context as previously active, + * the order of execution will remain the same and + * the tail will only advance. We do not need to + * force a full context restore, as a lite-restore + * is sufficient to resample the monotonic TAIL. + * + * If we switch to any other context, similarly we + * will not rewind TAIL of current context, and + * normal save/restore will preserve state and allow + * us to later continue executing the same request. + */ + last = NULL; + } else { + /* + * Otherwise if we already have a request pending + * for execution after the current one, we can + * just wait until the next CS event before + * queuing more. In either case we will force a + * lite-restore preemption event, but if we wait + * we hopefully coalesce several updates into a single + * submission. + */ + if (!list_is_last(&last->sched.link, + &engine->active.requests)) + return; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_fini_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } } while (rb) { /* XXX virtual is always taking precedence */ @@ -955,9 +1172,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } + if (i915_request_completed(rq)) { + ve->request = NULL; + ve->base.execlists.queue_priority_hint = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + rq->engine = engine; + __i915_request_submit(rq); + + spin_unlock(&ve->base.active.lock); + + rb = rb_first_cached(&execlists->virtual); + continue; + } + if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this rq for another engine */ + return; /* leave this for another */ } GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", @@ -1006,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - submit = true; - last = rq; + if (!i915_request_completed(rq)) { + submit = true; + last = rq; + } } spin_unlock(&ve->base.active.lock); @@ -1021,6 +1254,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + if (i915_request_completed(rq)) + goto skip; + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -1060,19 +1296,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - - if (submit) - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); port++; - - GEM_BUG_ON(port_isset(port)); } - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - last = rq; submit = true; +skip: + __i915_request_submit(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -1097,54 +1328,34 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); + GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", + engine->name, execlists->queue_priority_hint, + yesno(submit)); if (submit) { - port_assign(port, last); + *port = execlists_schedule_in(last, port - execlists->pending); + memset(port + 1, 0, (last_port - port) * sizeof(*port)); + execlists->switch_priority_hint = + switch_prio(engine, *execlists->pending); execlists_submit_ports(engine); + } else { + ring_set_paused(engine, 0); } - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - /* Re-evaluate the executing context setup after each preemptive kick */ - if (last) - execlists_user_begin(execlists, execlists->port); - - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); } -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) +static void +cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); - - while (num_ports-- && port_isset(port)) { - struct i915_request *rq = port_request(port); - - GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", - rq->engine->name, - (unsigned int)(port - execlists->port), - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); - - GEM_BUG_ON(!execlists->active); - execlists_context_schedule_out(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + struct i915_request * const *port, *rq; - i915_request_put(rq); + for (port = execlists->pending; (rq = *port); port++) + execlists_schedule_out(rq); + memset(execlists->pending, 0, sizeof(execlists->pending)); - memset(port, 0, sizeof(*port)); - port++; - } - - execlists_clear_all_active(execlists); + for (port = execlists->active; (rq = *port); port++) + execlists_schedule_out(rq); + execlists->active = + memset(execlists->inflight, 0, sizeof(execlists->inflight)); } static inline void @@ -1160,15 +1371,100 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } +enum csb_step { + CSB_NOP, + CSB_PROMOTE, + CSB_PREEMPT, + CSB_COMPLETE, +}; + +/* + * Starting with Gen12, the status has a new format: + * + * bit 0: switched to new queue + * bit 1: reserved + * bit 2: semaphore wait mode (poll or signal), only valid when + * switch detail is set to "wait on semaphore" + * bits 3-5: engine class + * bits 6-11: engine instance + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bits 32-35: context switch detail + * - 0: ctx complete + * - 1: wait on sync flip + * - 2: wait on vblank + * - 3: wait on scanline + * - 4: wait on semaphore + * - 5: context preempted (not on SEMAPHORE_WAIT or + * WAIT_FOR_EVENT) + * bit 36: reserved + * bits 37-43: wait detail (for switch detail 1 to 4) + * bits 44-46: reserved + * bits 47-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from + */ +static inline enum csb_step +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + u32 lower_dw = csb[0]; + u32 upper_dw = csb[1]; + bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + + if (!ctx_away_valid && ctx_to_valid) + return CSB_PROMOTE; + + /* + * The context switch detail is not guaranteed to be 5 when a preemption + * occurs, so we can't just check for that. The check below works for + * all the cases we care about, including preemptions of WAIT + * instructions and lite-restore. Preempt-to-idle via the CTRL register + * would require some extra handling, but we don't support that. + */ + if (new_queue && ctx_away_valid) + return CSB_PREEMPT; + + /* + * switch detail = 5 is covered by the case above and we do not expect a + * context switch on an unsuccessful wait instruction since we always + * use polling mode. + */ + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); + + if (*execlists->active) { + GEM_BUG_ON(!ctx_away_valid); + return CSB_COMPLETE; + } + + return CSB_NOP; +} + +static inline enum csb_step +gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + unsigned int status = *csb; + + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) + return CSB_PROMOTE; + + if (status & GEN8_CTX_STATUS_PREEMPTED) + return CSB_PREEMPT; + + if (*execlists->active) + return CSB_COMPLETE; + + return CSB_NOP; +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; const u32 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->active.lock); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* @@ -1198,9 +1494,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - struct i915_request *rq; - unsigned int status; - unsigned int count; + enum csb_step csb_step; if (++head == num_entries) head = 0; @@ -1223,68 +1517,43 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", engine->name, head, - buf[2 * head + 0], buf[2 * head + 1], - execlists->active); - - status = buf[2 * head]; - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); - - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; + buf[2 * head + 0], buf[2 * head + 1]); - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + if (INTEL_GEN(engine->i915) >= 12) + csb_step = gen12_csb_parse(execlists, buf + 2 * head); + else + csb_step = gen8_csb_parse(execlists, buf + 2 * head); - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } + switch (csb_step) { + case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + trace_ports(execlists, "preempted", execlists->active); - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; + while (*execlists->active) + execlists_schedule_out(*execlists->active++); - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); + /* fallthrough */ + case CSB_PROMOTE: /* switch pending to inflight */ + GEM_BUG_ON(*execlists->active); + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + execlists->active = + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - rq ? hwsp_seqno(rq) : 0, - rq ? rq_prio(rq) : 0); + if (enable_timeslice(execlists)) + mod_timer(&execlists->timer, jiffies + 1); - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + WRITE_ONCE(execlists->pending[0], NULL); + break; + + case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + trace_ports(execlists, "completed", execlists->active); /* * We rely on the hardware being strongly @@ -1292,22 +1561,16 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(rq)); - - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); + GEM_BUG_ON(!i915_request_completed(*execlists->active) && + !reset_in_progress(execlists)); + execlists_schedule_out(*execlists->active++); - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); + break; - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); + case CSB_NOP: + break; } } while (head != tail); @@ -1330,9 +1593,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - - process_csb(engine); - if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) + if (!engine->execlists.pending[0]) execlists_dequeue(engine); } @@ -1345,14 +1606,21 @@ static void execlists_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; unsigned long flags; - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - !!intel_wakeref_active(&engine->wakeref), - engine->execlists.active); + process_csb(engine); + if (!READ_ONCE(engine->execlists.pending[0])) { + spin_lock_irqsave(&engine->active.lock, flags); + __execlists_submission_tasklet(engine); + spin_unlock_irqrestore(&engine->active.lock, flags); + } +} - spin_lock_irqsave(&engine->active.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); +static void execlists_submission_timer(struct timer_list *timer) +{ + struct intel_engine_cs *engine = + from_timer(engine, timer, execlists.timer); + + /* Kick the tasklet for some interrupt coalescing and reset handling */ + tasklet_hi_schedule(&engine->execlists.tasklet); } static void queue_request(struct intel_engine_cs *engine, @@ -1376,12 +1644,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) tasklet_hi_schedule(&execlists->tasklet); } -static void submit_queue(struct intel_engine_cs *engine, int prio) +static void submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) { - if (prio > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = prio; - __submit_queue_imm(engine); - } + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return; + + execlists->queue_priority_hint = rq_prio(rq); + __submit_queue_imm(engine); } static void execlists_submit_request(struct i915_request *request) @@ -1397,7 +1669,7 @@ static void execlists_submit_request(struct i915_request *request) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, rq_prio(request)); + submit_queue(engine, request); spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -1405,9 +1677,7 @@ static void execlists_submit_request(struct i915_request *request) static void __execlists_context_fini(struct intel_context *ce) { intel_ring_put(ce->ring); - - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void execlists_context_destroy(struct kref *kref) @@ -1420,13 +1690,45 @@ static void execlists_context_destroy(struct kref *kref) if (ce->state) __execlists_context_fini(ce); + intel_context_fini(ce); intel_context_free(ce); } +static void +set_redzone(void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + vaddr += engine->context_size; + + memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); +} + +static void +check_redzone(const void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + vaddr += engine->context_size; + + if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) + dev_err_once(engine->i915->drm.dev, + "%s context redzone overwritten!\n", + engine->name); +} + static void execlists_context_unpin(struct intel_context *ce) { + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, + ce->engine); + i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); + intel_ring_reset(ce->ring, ce->ring->tail); } static void @@ -1444,9 +1746,12 @@ __execlists_update_reg_state(struct intel_context *ce, regs[CTX_RING_TAIL + 1] = ring->tail; /* RPCS */ - if (engine->class == RENDER_CLASS) + if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE + 1] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); + + i915_oa_init_reg_state(engine, ce, regs); + } } static int @@ -1456,19 +1761,12 @@ __execlists_context_pin(struct intel_context *ce, void *vaddr; int ret; - GEM_BUG_ON(!ce->gem_context->vm); - - ret = execlists_context_deferred_alloc(ce, engine); - if (ret) - goto err; GEM_BUG_ON(!ce->state); - ret = intel_context_active_acquire(ce, - engine->i915->ggtt.pin_bias | - PIN_OFFSET_BIAS | - PIN_HIGH); + ret = intel_context_active_acquire(ce); if (ret) goto err; + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); vaddr = i915_gem_object_pin_map(ce->state->obj, i915_coherent_map_type(engine->i915) | @@ -1501,6 +1799,11 @@ static int execlists_context_pin(struct intel_context *ce) return __execlists_context_pin(ce, ce->engine); } +static int execlists_context_alloc(struct intel_context *ce) +{ + return __execlists_context_alloc(ce, ce->engine); +} + static void execlists_context_reset(struct intel_context *ce) { /* @@ -1524,6 +1827,8 @@ static void execlists_context_reset(struct intel_context *ce) } static const struct intel_context_ops execlists_context_ops = { + .alloc = execlists_context_alloc, + .pin = execlists_context_pin, .unpin = execlists_context_unpin, @@ -1569,8 +1874,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = - i915_vm_to_ppgtt(rq->gem_context->vm); + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm); int err, i; u32 *cs; @@ -1643,7 +1947,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->gem_context->vm)) + if (i915_vm_is_4lvl(request->hw_context->vm)) ret = request->engine->emit_flush(request, EMIT_INVALIDATE); else ret = emit_pdps(request); @@ -1676,7 +1980,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) /* NB no one else is allowed to scribble over scratch + 256! */ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; *batch++ = MI_LOAD_REGISTER_IMM(1); @@ -1690,12 +1995,19 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); *batch++ = 0; return batch; } +static u32 slm_offset(struct intel_engine_cs *engine) +{ + return intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); +} + /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -1727,8 +2039,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - i915_scratch_offset(engine->i915) + - 2 * CACHELINE_BYTES); + slm_offset(engine)); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1874,7 +2185,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1914,6 +2225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return 0; switch (INTEL_GEN(engine->i915)) { + case 12: case 11: return 0; case 10: @@ -1970,22 +2282,23 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) static void enable_execlists(struct intel_engine_cs *engine) { + u32 mode; + + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ if (INTEL_GEN(engine->i915) >= 11) - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); else - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); + ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); - ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - ENGINE_WRITE(engine, - RING_HWS_PGA, - i915_ggtt_offset(engine->status_page.vma)); + ENGINE_WRITE_FW(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); } @@ -1993,7 +2306,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) { bool unexpected = false; - if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) { + if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } @@ -2041,34 +2354,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); GEM_BUG_ON(!reset_in_progress(execlists)); - intel_engine_stop_cs(engine); - /* And flush any current direct submission. */ spin_lock_irqsave(&engine->active.lock, flags); spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static bool lrc_regs_ok(const struct i915_request *rq) -{ - const struct intel_ring *ring = rq->ring; - const u32 *regs = rq->hw_context->lrc_reg_state; - - /* Quick spot check for the common signs of context corruption */ - - if (regs[CTX_RING_BUFFER_CONTROL + 1] != - (RING_CTL_SIZE(ring->size) | RING_VALID)) - return false; - if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) - return false; - - return true; + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * FIXME: Wa for more modern gens needs to be validated + */ + intel_engine_stop_cs(engine); } -static void reset_csb_pointers(struct intel_engine_execlists *execlists) +static void reset_csb_pointers(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; const unsigned int reset_value = execlists->csb_size - 1; + ring_set_paused(engine, 0); + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -2088,15 +2399,15 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->engine->active.requests; - const struct intel_context * const context = rq->hw_context; + const struct list_head * const list = &rq->timeline->requests; + const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; - list_for_each_entry_from_reverse(rq, list, sched.link) { + list_for_each_entry_from_reverse(rq, list, link) { if (i915_request_completed(rq)) break; - if (rq->hw_context != context) + if (rq->hw_context != ce) break; active = rq; @@ -2115,33 +2426,27 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(&engine->execlists); + reset_csb_pointers(engine); /* * Save the currently executing context, even if we completed * its request, it was still running at the time of the * reset and will have been clobbered. */ - if (!port_isset(execlists->port)) - goto out_clear; + rq = execlists_active(execlists); + if (!rq) + goto unwind; - rq = port_request(execlists->port); ce = rq->hw_context; - - /* - * Catch up with any missed context-switch interrupts. - * - * Ideally we would just read the remaining CSB entries now that we - * know the gpu is idle. However, the CSB registers are sometimes^W - * often trashed across a GPU reset! Instead we have to rely on - * guessing the missed context-switch events by looking at what - * requests were completed. - */ - execlists_cancel_port_requests(execlists); - + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); rq = active_request(rq); - if (!rq) + if (!rq) { + ce->ring->head = ce->ring->tail; goto out_replay; + } + + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2155,7 +2460,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * Otherwise, if we have not started yet, the request should replay * perfectly and we do not need to flag the result as being erroneous. */ - if (!i915_request_started(rq) && lrc_regs_ok(rq)) + if (!i915_request_started(rq)) goto out_replay; /* @@ -2169,8 +2474,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - i915_reset_request(rq, stalled); - if (!stalled && lrc_regs_ok(rq)) + __i915_request_reset(rq, stalled); + if (!stalled) goto out_replay; /* @@ -2190,17 +2495,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) execlists_init_reg_state(regs, ce, engine, ce->ring); out_replay: - /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = - rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; + GEM_TRACE("%s replay {head:%04x, tail:%04x\n", + engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); +unwind: /* Push back any incomplete requests for replay after the reset. */ + cancel_port_requests(execlists); __unwind_incomplete_requests(engine); - -out_clear: - execlists_clear_all_active(execlists); } static void execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -2296,7 +2599,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; @@ -2434,7 +2736,8 @@ static int gen8_emit_flush_render(struct i915_request *request, { struct intel_engine_cs *engine = request->engine; u32 scratch_addr = - i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; + intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2499,6 +2802,63 @@ static int gen8_emit_flush_render(struct i915_request *request, return 0; } +static int gen11_emit_flush_render(struct i915_request *request, + u32 mode) +{ + struct intel_engine_cs *engine = request->engine; + const u32 scratch_addr = + intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + + if (mode & EMIT_FLUSH) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + intel_ring_advance(request, cs); + } + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -2514,15 +2874,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - 0); + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + + return cs; +} +static __always_inline u32* +gen8_emit_fini_breadcrumb_footer(struct i915_request *request, + u32 *cs) +{ *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = emit_preempt_busywait(request, cs); request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2530,51 +2903,53 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + 0); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE); + + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL, 0); - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); + return gen8_emit_fini_breadcrumb_footer(request, cs); } -static int gen8_init_rcs_context(struct i915_request *rq) +static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, + u32 *cs) { - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret) - return ret; - - ret = intel_rcs_context_init_mocs(rq); - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So generate an error and carry on. - */ - if (ret) - DRM_ERROR("MOCS failed to program: expect performance issues.\n"); + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); - return i915_gem_render_state_emit(rq); + return gen8_emit_fini_breadcrumb_footer(request, cs); } static void execlists_park(struct intel_engine_cs *engine) { - intel_engine_park(engine); + del_timer(&engine->execlists.timer); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -2592,11 +2967,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) + if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (engine->preempt_context && - HAS_LOGICAL_RING_PREEMPTION(engine->i915)) - engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + } } static void execlists_destroy(struct intel_engine_cs *engine) @@ -2665,22 +3040,32 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -int intel_execlists_submission_setup(struct intel_engine_cs *engine) +static void rcs_submission_override(struct intel_engine_cs *engine) { - /* Intentionally left blank. */ - engine->buffer = NULL; + switch (INTEL_GEN(engine->i915)) { + case 12: + case 11: + engine->emit_flush = gen11_emit_flush_render; + engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; + break; + default: + engine->emit_flush = gen8_emit_flush_render; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + break; + } +} +int intel_execlists_submission_setup(struct intel_engine_cs *engine) +{ tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); + timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); - if (engine->class == RENDER_CLASS) { - engine->init_context = gen8_init_rcs_context; - engine->emit_flush = gen8_emit_flush_render; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; - } + if (engine->class == RENDER_CLASS) + rcs_submission_override(engine); return 0; } @@ -2697,9 +3082,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) if (ret) return ret; - intel_engine_init_workarounds(engine); - intel_engine_init_whitelist(engine); - if (intel_init_workaround_bb(engine)) /* * We continue even if we fail to initialize WA batch @@ -2718,11 +3100,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_ELSP(base)); } - execlists->preempt_complete_status = ~0u; - if (engine->preempt_context) - execlists->preempt_complete_status = - upper_32_bits(engine->preempt_context->lrc_desc); - execlists->csb_status = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; @@ -2734,7 +3111,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(execlists); + reset_csb_pointers(engine); return 0; } @@ -2747,6 +3124,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 12: + indirect_ctx_offset = + GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 11: indirect_ctx_offset = GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -2773,7 +3154,7 @@ static void execlists_init_reg_state(u32 *regs, struct intel_engine_cs *engine, struct intel_ring *ring) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm); + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; @@ -2864,8 +3245,6 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - i915_oa_init_reg_state(engine, ce, regs); } regs[CTX_END] = MI_BATCH_BUFFER_END; @@ -2890,6 +3269,8 @@ populate_lr_context(struct intel_context *ce, return ret; } + set_redzone(vaddr, engine); + if (engine->default_state) { /* * We only want to copy over the template context state; @@ -2917,11 +3298,6 @@ populate_lr_context(struct intel_context *ce, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ce->gem_context == engine->i915->preempt_context && - INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); ret = 0; err_unpin_ctx: @@ -2932,27 +3308,16 @@ err_unpin_ctx: return ret; } -static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) -{ - if (ctx->timeline) - return i915_timeline_get(ctx->timeline); - else - return i915_timeline_create(ctx->i915, NULL); -} - -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; + struct intel_ring *ring; struct i915_vma *vma; u32 context_size; - struct intel_ring *ring; - struct i915_timeline *timeline; int ret; - if (ce->state) - return 0; - + GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* @@ -2960,27 +3325,32 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, * for our own use and for sharing with the GuC. */ context_size += LRC_HEADER_PAGES * PAGE_SIZE; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + context_size += I915_GTT_PAGE_SIZE; /* for redzone */ ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); - vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL); + vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; } - timeline = get_timeline(ce->gem_context); - if (IS_ERR(timeline)) { - ret = PTR_ERR(timeline); - goto error_deref_obj; + if (!ce->timeline) { + struct intel_timeline *tl; + + tl = intel_timeline_create(engine->gt, NULL); + if (IS_ERR(tl)) { + ret = PTR_ERR(tl); + goto error_deref_obj; + } + + ce->timeline = tl; } - ring = intel_engine_create_ring(engine, - timeline, - ce->gem_context->ring_size); - i915_timeline_put(timeline); + ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -3038,6 +3408,7 @@ static void virtual_context_destroy(struct kref *kref) if (ve->context.state) __execlists_context_fini(&ve->context); + intel_context_fini(&ve->context); kfree(ve->bonds); kfree(ve); @@ -3090,6 +3461,8 @@ static void virtual_context_enter(struct intel_context *ce) for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->timeline); } static void virtual_context_exit(struct intel_context *ce) @@ -3097,6 +3470,8 @@ static void virtual_context_exit(struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); unsigned int n; + intel_timeline_exit(ce->timeline); + for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); } @@ -3290,11 +3665,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, return ERR_PTR(-ENOMEM); ve->base.i915 = ctx->i915; + ve->base.gt = siblings[0]->gt; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - ve->base.flags = I915_ENGINE_IS_VIRTUAL; /* * The decision on whether to submit a request using semaphores @@ -3391,8 +3766,18 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; ve->base.emit_fini_breadcrumb_dw = sibling->emit_fini_breadcrumb_dw; + + ve->base.flags = sibling->flags; } + ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + + err = __execlists_context_alloc(&ve->context, siblings[0]); + if (err) + goto err_put; + + __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); + return &ve->context; err_put: |