diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 216 |
1 files changed, 140 insertions, 76 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 174479232e94..43957bb37a42 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -541,11 +541,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); - GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) != - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)); /* * Switch to our empty preempt context so @@ -1277,6 +1272,8 @@ static void execlists_context_destroy(struct intel_context *ce) static void execlists_context_unpin(struct intel_context *ce) { + i915_gem_context_unpin_hw_id(ce->gem_context); + intel_ring_unpin(ce->ring); ce->state->obj->pin_global--; @@ -1297,16 +1294,15 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) * on an active context (which by nature is already on the GPU). */ if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + err = i915_gem_object_set_to_wc_domain(vma->obj, true); if (err) return err; } flags = PIN_GLOBAL | PIN_HIGH; - if (ctx->ggtt_offset_bias) - flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags); + return i915_vma_pin(vma, 0, 0, flags); } static struct intel_context * @@ -1326,28 +1322,38 @@ __execlists_context_pin(struct intel_engine_cs *engine, if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(ctx->i915) | + I915_MAP_OVERRIDE); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto unpin_vma; } - ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias); + ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; + ret = i915_gem_context_pin_hw_id(ctx); + if (ret) + goto unpin_ring; + intel_lr_context_descriptor_update(ctx, engine, ce); + GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); + ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); - GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); - ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head; + ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head; + ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail; ce->state->obj->pin_global++; i915_gem_context_get(ctx); return ce; +unpin_ring: + intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_vma: @@ -1643,7 +1649,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err; @@ -1657,7 +1663,7 @@ err: static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { - i915_vma_unpin_and_release(&engine->wa_ctx.vma); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); @@ -1775,11 +1781,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) static int gen8_init_common_ring(struct intel_engine_cs *engine) { - int ret; - - ret = intel_mocs_init_engine(engine); - if (ret) - return ret; + intel_mocs_init_engine(engine); intel_engine_reset_breadcrumbs(engine); @@ -1838,7 +1840,8 @@ execlists_reset_prepare(struct intel_engine_cs *engine) struct i915_request *request, *active; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s: depth<-%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -1971,22 +1974,18 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - /* After a GPU reset, we may have requests to replay */ - if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) - tasklet_schedule(&execlists->tasklet); - /* - * Flush the tasklet while we still have the forcewake to be sure - * that it is not allowed to sleep before we restart and reload a - * context. + * After a GPU reset, we may have requests to replay. Do so now while + * we still have the forcewake to be sure that the GPU is not allowed + * to sleep before we restart and reload a context. * - * As before (with execlists_reset_prepare) we rely on the caller - * serialising multiple attempts to reset so that we know that we - * are the only one manipulating tasklet state. */ - __tasklet_enable_sync_once(&execlists->tasklet); + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) + execlists->tasklet.func(execlists->tasklet.data); - GEM_TRACE("%s\n", engine->name); + tasklet_enable(&execlists->tasklet); + GEM_TRACE("%s: depth->%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); } static int intel_logical_ring_emit_pdps(struct i915_request *rq) @@ -2066,8 +2065,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, /* FIXME(BDW): Address space and security selectors. */ *cs++ = MI_BATCH_BUFFER_START_GEN8 | - (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | - (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); @@ -2398,7 +2396,7 @@ static int logical_ring_init(struct intel_engine_cs *engine) ret = intel_engine_init_common(engine); if (ret) - goto error; + return ret; if (HAS_LOGICAL_RING_ELSQ(i915)) { execlists->submit_reg = i915->regs + @@ -2440,10 +2438,6 @@ static int logical_ring_init(struct intel_engine_cs *engine) reset_csb_pointers(execlists); return 0; - -error: - intel_logical_ring_cleanup(engine); - return ret; } int logical_render_ring_init(struct intel_engine_cs *engine) @@ -2466,10 +2460,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; - ret = intel_engine_create_scratch(engine, PAGE_SIZE); + ret = logical_ring_init(engine); if (ret) return ret; + ret = intel_engine_create_scratch(engine, PAGE_SIZE); + if (ret) + goto err_cleanup_common; + ret = intel_init_workaround_bb(engine); if (ret) { /* @@ -2481,7 +2479,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine) ret); } - return logical_ring_init(engine); + return 0; + +err_cleanup_common: + intel_engine_cleanup_common(engine); + return ret; } int logical_xcs_ring_init(struct intel_engine_cs *engine) @@ -2494,6 +2496,9 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) static u32 make_rpcs(struct drm_i915_private *dev_priv) { + bool subslice_pg = INTEL_INFO(dev_priv)->sseu.has_subslice_pg; + u8 slices = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask); + u8 subslices = hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]); u32 rpcs = 0; /* @@ -2504,30 +2509,88 @@ make_rpcs(struct drm_i915_private *dev_priv) return 0; /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN11(dev_priv) && slices == 1 && subslices >= 4) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* * Starting in Gen9, render power gating can leave * slice/subslice/EU in a partially enabled state. We * must make an explicit request through RPCS for full * enablement. */ if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { - rpcs |= GEN8_RPCS_S_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << - GEN8_RPCS_S_CNT_SHIFT; - rpcs |= GEN8_RPCS_ENABLE; + u32 mask, val = slices; + + if (INTEL_GEN(dev_priv) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; } - if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { - rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << - GEN8_RPCS_SS_CNT_SHIFT; - rpcs |= GEN8_RPCS_ENABLE; + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; } if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MIN_SHIFT; - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MAX_SHIFT; + u32 val; + + val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice << + GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice << + GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + rpcs |= GEN8_RPCS_ENABLE; } @@ -2584,11 +2647,13 @@ static void execlists_init_reg_state(u32 *regs, MI_LRI_FORCE_POSTED; CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); + if (INTEL_GEN(dev_priv) < 11) { + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + } CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); @@ -2654,6 +2719,10 @@ static void execlists_init_reg_state(u32 *regs, i915_oa_init_reg_state(engine, ctx, regs); } + + regs[CTX_END] = MI_BATCH_BUFFER_END; + if (INTEL_GEN(dev_priv) >= 10) + regs[CTX_END] |= BIT(0); } static int @@ -2780,13 +2849,14 @@ error_deref_obj: return ret; } -void intel_lr_context_resume(struct drm_i915_private *dev_priv) +void intel_lr_context_resume(struct drm_i915_private *i915) { struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - /* Because we emit WA_TAIL_DWORDS there may be a disparity + /* + * Because we emit WA_TAIL_DWORDS there may be a disparity * between our bookkeeping in ce->ring->head and ce->ring->tail and * that stored in context. As we only write new commands from * ce->ring->tail onwards, everything before that is junk. If the GPU @@ -2796,28 +2866,22 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) * So to avoid that we reset the context images upon resume. For * simplicity, we just zero everything out. */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - for_each_engine(engine, dev_priv, id) { + list_for_each_entry(ctx, &i915->contexts.list, link) { + for_each_engine(engine, i915, id) { struct intel_context *ce = to_intel_context(ctx, engine); - u32 *reg; if (!ce->state) continue; - reg = i915_gem_object_pin_map(ce->state->obj, - I915_MAP_WB); - if (WARN_ON(IS_ERR(reg))) - continue; - - reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg); - reg[CTX_RING_HEAD+1] = 0; - reg[CTX_RING_TAIL+1] = 0; + intel_ring_reset(ce->ring, 0); - ce->state->obj->mm.dirty = true; - i915_gem_object_unpin_map(ce->state->obj); + if (ce->pin_count) { /* otherwise done in context_pin */ + u32 *regs = ce->lrc_reg_state; - intel_ring_reset(ce->ring, 0); + regs[CTX_RING_HEAD + 1] = ce->ring->head; + regs[CTX_RING_TAIL + 1] = ce->ring->tail; + } } } } |