diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 376 |
1 files changed, 95 insertions, 281 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 58d1d3d47dd3..4be167dcd209 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -259,63 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, ce->lrc_desc = desc; } -static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, int prio) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_priolist *p; - struct rb_node **parent, *rb; - bool first = true; - - if (unlikely(execlists->no_priolist)) - prio = I915_PRIORITY_NORMAL; - -find_priolist: - /* most positive priority is scheduled first, equal priorities fifo */ - rb = NULL; - parent = &execlists->queue.rb_root.rb_node; - while (*parent) { - rb = *parent; - p = to_priolist(rb); - if (prio > p->priority) { - parent = &rb->rb_left; - } else if (prio < p->priority) { - parent = &rb->rb_right; - first = false; - } else { - return p; - } - } - - if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; - } else { - p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); - /* Convert an allocation failure to a priority bump */ - if (unlikely(!p)) { - prio = I915_PRIORITY_NORMAL; /* recurses just once */ - - /* To maintain ordering with all rendering, after an - * allocation failure we have to disable all scheduling. - * Requests will then be executed in fifo, and schedule - * will ensure that dependencies are emitted in fifo. - * There will be still some reordering with existing - * requests, so if userspace lied about their - * dependencies that reordering may be visible. - */ - execlists->no_priolist = true; - goto find_priolist; - } - } - - p->priority = prio; - INIT_LIST_HEAD(&p->requests); - rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); - - return p; -} - static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -324,9 +267,9 @@ static void unwind_wa_tail(struct i915_request *rq) static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { - struct i915_request *rq, *rn; - struct i915_priolist *uninitialized_var(p); - int last_prio = I915_PRIORITY_INVALID; + struct i915_request *rq, *rn, *active = NULL; + struct list_head *uninitialized_var(pl); + int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT; lockdep_assert_held(&engine->timeline.lock); @@ -334,19 +277,34 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) &engine->timeline.requests, link) { if (i915_request_completed(rq)) - return; + break; __i915_request_unsubmit(rq); unwind_wa_tail(rq); + GEM_BUG_ON(rq->hw_context->active); + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != last_prio) { - last_prio = rq_prio(rq); - p = lookup_priolist(engine, last_prio); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(p->priority != rq_prio(rq)); - list_add(&rq->sched.link, &p->requests); + list_add(&rq->sched.link, pl); + + active = rq; + } + + /* + * The active request is now effectively the start of a new client + * stream, so give it the equivalent small priority bump to prevent + * it being gazumped a second time by another peer. + */ + if (!(prio & I915_PRIORITY_NEWCLIENT)) { + prio |= I915_PRIORITY_NEWCLIENT; + list_move_tail(&active->sched.link, + i915_sched_lookup_priolist(engine, prio)); } } @@ -355,13 +313,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); __unwind_incomplete_requests(engine); - - spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -394,13 +347,17 @@ execlists_user_end(struct intel_engine_execlists *execlists) static inline void execlists_context_schedule_in(struct i915_request *rq) { + GEM_BUG_ON(rq->hw_context->active); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(rq->engine); + rq->hw_context->active = rq->engine; } static inline void execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { + rq->hw_context->active = NULL; intel_engine_context_out(rq->engine); execlists_context_status_change(rq, status); trace_i915_request_out(rq); @@ -417,9 +374,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { + struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; struct intel_context *ce = rq->hw_context; - struct i915_hw_ppgtt *ppgtt = - rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); @@ -430,7 +386,7 @@ static u64 execlists_update_context(struct i915_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm)) + if (!i915_vm_is_48bit(&ppgtt->vm)) execlists_update_context_pdps(ppgtt, reg_state); /* @@ -686,8 +642,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; + int i; - list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + priolist_for_each_request_consume(rq, rn, p, i) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -706,11 +663,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * combine this request with the last, then we * are done. */ - if (port == last_port) { - __list_del_many(&p->requests, - &rq->sched.link); + if (port == last_port) goto done; - } /* * If GVT overrides us we only ever submit @@ -720,11 +674,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * request) to the second port. */ if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) { - __list_del_many(&p->requests, - &rq->sched.link); + ctx_single_port_submission(rq->hw_context)) goto done; - } GEM_BUG_ON(last->hw_context == rq->hw_context); @@ -735,15 +686,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->sched.link); + list_del_init(&rq->sched.link); + __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); + last = rq; submit = true; } rb_erase_cached(&p->node, &execlists->queue); - INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } @@ -820,6 +772,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void reset_csb_pointers(struct intel_engine_execlists *execlists) { + const unsigned int reset_value = GEN8_CSB_ENTRIES - 1; + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -829,8 +783,8 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) * inline comparison of our cached head position against the last HW * write works even before the first interrupt. */ - execlists->csb_head = execlists->csb_write_reset; - WRITE_ONCE(*execlists->csb_write, execlists->csb_write_reset); + execlists->csb_head = reset_value; + WRITE_ONCE(*execlists->csb_write, reset_value); } static void nop_submission_tasklet(unsigned long data) @@ -871,27 +825,34 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (!i915_request_completed(rq)) - dma_fence_set_error(&rq->fence, -EIO); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + continue; + + dma_fence_set_error(&rq->fence, -EIO); } /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); + int i; - list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { - INIT_LIST_HEAD(&rq->sched.link); + priolist_for_each_request_consume(rq, rn, p, i) { + list_del_init(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); } rb_erase_cached(&p->node, &execlists->queue); - INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } + intel_write_status_page(engine, + I915_GEM_HWS_INDEX, + intel_engine_last_submit(engine)); + /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority = INT_MIN; @@ -1093,13 +1054,7 @@ static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) { - list_add_tail(&node->link, - &lookup_priolist(engine, prio)->requests); -} - -static void __update_queue(struct intel_engine_cs *engine, int prio) -{ - engine->execlists.queue_priority = prio; + list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -1118,7 +1073,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) static void submit_queue(struct intel_engine_cs *engine, int prio) { if (prio > engine->execlists.queue_priority) { - __update_queue(engine, prio); + engine->execlists.queue_priority = prio; __submit_queue_imm(engine); } } @@ -1141,139 +1096,6 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *sched_to_request(struct i915_sched_node *node) -{ - return container_of(node, struct i915_request, sched); -} - -static struct intel_engine_cs * -sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) -{ - struct intel_engine_cs *engine = sched_to_request(node)->engine; - - GEM_BUG_ON(!locked); - - if (engine != locked) { - spin_unlock(&locked->timeline.lock); - spin_lock(&engine->timeline.lock); - } - - return engine; -} - -static void execlists_schedule(struct i915_request *request, - const struct i915_sched_attr *attr) -{ - struct i915_priolist *uninitialized_var(pl); - struct intel_engine_cs *engine, *last; - struct i915_dependency *dep, *p; - struct i915_dependency stack; - const int prio = attr->priority; - LIST_HEAD(dfs); - - GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - - if (i915_request_completed(request)) - return; - - if (prio <= READ_ONCE(request->sched.attr.priority)) - return; - - /* Need BKL in order to use the temporary link inside i915_dependency */ - lockdep_assert_held(&request->i915->drm.struct_mutex); - - stack.signaler = &request->sched; - list_add(&stack.dfs_link, &dfs); - - /* - * Recursively bump all dependent priorities to match the new request. - * - * A naive approach would be to use recursion: - * static void update_priorities(struct i915_sched_node *node, prio) { - * list_for_each_entry(dep, &node->signalers_list, signal_link) - * update_priorities(dep->signal, prio) - * queue_request(node); - * } - * but that may have unlimited recursion depth and so runs a very - * real risk of overunning the kernel stack. Instead, we build - * a flat list of all dependencies starting with the current request. - * As we walk the list of dependencies, we add all of its dependencies - * to the end of the list (this may include an already visited - * request) and continue to walk onwards onto the new dependencies. The - * end result is a topological list of requests in reverse order, the - * last element in the list is the request we must execute first. - */ - list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; - - /* - * Within an engine, there can be no cycle, but we may - * refer to the same dependency chain multiple times - * (redundant dependencies are not eliminated) and across - * engines. - */ - list_for_each_entry(p, &node->signalers_list, signal_link) { - GEM_BUG_ON(p == dep); /* no cycles! */ - - if (i915_sched_node_signaled(p->signaler)) - continue; - - GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); - if (prio > READ_ONCE(p->signaler->attr.priority)) - list_move_tail(&p->dfs_link, &dfs); - } - } - - /* - * If we didn't need to bump any existing priorities, and we haven't - * yet submitted this request (i.e. there is no potential race with - * execlists_submit_request()), we can set our own priority and skip - * acquiring the engine locks. - */ - if (request->sched.attr.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->sched.link)); - request->sched.attr = *attr; - if (stack.dfs_link.next == stack.dfs_link.prev) - return; - __list_del_entry(&stack.dfs_link); - } - - last = NULL; - engine = request->engine; - spin_lock_irq(&engine->timeline.lock); - - /* Fifo and depth-first replacement ensure our deps execute before us */ - list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; - - INIT_LIST_HEAD(&dep->dfs_link); - - engine = sched_lock_engine(node, engine); - - if (prio <= node->attr.priority) - continue; - - node->attr.priority = prio; - if (!list_empty(&node->link)) { - if (last != engine) { - pl = lookup_priolist(engine, prio); - last = engine; - } - GEM_BUG_ON(pl->priority != prio); - list_move_tail(&node->link, &pl->requests); - } - - if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&sched_to_request(node)->submit)) { - /* defer submission until after all of our updates */ - __update_queue(engine, prio); - tasklet_hi_schedule(&engine->execlists.tasklet); - } - } - - spin_unlock_irq(&engine->timeline.lock); -} - static void execlists_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); @@ -1289,6 +1111,28 @@ static void execlists_context_destroy(struct intel_context *ce) static void execlists_context_unpin(struct intel_context *ce) { + struct intel_engine_cs *engine; + + /* + * The tasklet may still be using a pointer to our state, via an + * old request. However, since we know we only unpin the context + * on retirement of the following request, we know that the last + * request referencing us will have had a completion CS interrupt. + * If we see that it is still active, it means that the tasklet hasn't + * had the chance to run yet; let it run before we teardown the + * reference it may use. + */ + engine = READ_ONCE(ce->active); + if (unlikely(engine)) { + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); + process_csb(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + GEM_BUG_ON(READ_ONCE(ce->active)); + } + i915_gem_context_unpin_hw_id(ce->gem_context); intel_ring_unpin(ce->ring); @@ -1392,6 +1236,7 @@ execlists_context_pin(struct intel_engine_cs *engine, struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); + GEM_BUG_ON(!ctx->ppgtt); if (likely(ce->pin_count++)) return ce; @@ -1571,18 +1416,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); - /* WaClearSlmSpaceAtContextSwitch:kbl */ - /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - i915_scratch_offset(engine->i915) - + 2 * CACHELINE_BYTES); - } - /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* @@ -1697,7 +1530,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) unsigned int i; int ret; - if (GEM_WARN_ON(engine->id != RCS)) + if (GEM_DEBUG_WARN_ON(engine->id != RCS)) return -EINVAL; switch (INTEL_GEN(engine->i915)) { @@ -1736,8 +1569,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) */ for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { wa_bb[i]->offset = batch_ptr - batch; - if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, - CACHELINE_BYTES))) { + if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, + CACHELINE_BYTES))) { ret = -EINVAL; break; } @@ -1825,7 +1658,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - intel_whitelist_workarounds_apply(engine); + intel_engine_apply_whitelist(engine); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -1848,7 +1681,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - intel_whitelist_workarounds_apply(engine); + intel_engine_apply_whitelist(engine); return 0; } @@ -1922,7 +1755,7 @@ static void execlists_reset(struct intel_engine_cs *engine, unsigned long flags; u32 *regs; - GEM_TRACE("%s request global=%x, current=%d\n", + GEM_TRACE("%s request global=%d, current=%d\n", engine->name, request ? request->global_seqno : 0, intel_engine_get_seqno(engine)); @@ -2049,8 +1882,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (rq->gem_context->ppgtt && - (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && + if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) && !intel_vgpu_active(rq->i915)) { ret = intel_logical_ring_emit_pdps(rq); @@ -2129,7 +1961,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; - if (request->engine->id == VCS) + if (request->engine->class == VIDEO_DECODE_CLASS) cmd |= MI_INVALIDATE_BSD; } @@ -2261,7 +2093,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ctx_workarounds_emit(rq); + ret = intel_engine_emit_ctx_wa(rq); if (ret) return ret; @@ -2314,7 +2146,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; engine->cancel_requests = execlists_cancel_requests; - engine->schedule = execlists_schedule; + engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; @@ -2402,12 +2234,6 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_irqs(engine); } -static bool csb_force_mmio(struct drm_i915_private *i915) -{ - /* Older GVT emulation depends upon intercepting CSB mmio */ - return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915); -} - static int logical_ring_init(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -2437,24 +2263,12 @@ static int logical_ring_init(struct intel_engine_cs *engine) upper_32_bits(ce->lrc_desc); } - execlists->csb_read = - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); - if (csb_force_mmio(i915)) { - execlists->csb_status = (u32 __force *) - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + execlists->csb_status = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - execlists->csb_write = (u32 __force *)execlists->csb_read; - execlists->csb_write_reset = - _MASKED_FIELD(GEN8_CSB_WRITE_PTR_MASK, - GEN8_CSB_ENTRIES - 1); - } else { - execlists->csb_status = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + execlists->csb_write = + &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; - execlists->csb_write = - &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; - execlists->csb_write_reset = GEN8_CSB_ENTRIES - 1; - } reset_csb_pointers(execlists); return 0; @@ -2495,6 +2309,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) ret); } + intel_engine_init_whitelist(engine); intel_engine_init_workarounds(engine); return 0; @@ -2646,7 +2461,6 @@ static void execlists_init_reg_state(u32 *regs, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; u32 base = engine->mmio_base; bool rcs = engine->class == RENDER_CLASS; @@ -2718,12 +2532,12 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (ppgtt && i915_vm_is_48bit(&ppgtt->vm)) { + if (i915_vm_is_48bit(&ctx->ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, regs); + ASSIGN_CTX_PML4(ctx->ppgtt, regs); } if (rcs) { |