diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 251 |
1 files changed, 193 insertions, 58 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0f358c5999ec..09df74b8e917 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -254,8 +254,10 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) return lrca >> 12; } -static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) +static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj) { + struct drm_device *dev = ring->dev; uint64_t desc; uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); @@ -272,6 +274,13 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) * signalling between Command Streamers */ /* desc |= GEN8_CTX_FORCE_RESTORE; */ + /* WaEnableForceRestoreInCtxtDescForVCS:skl */ + if (IS_GEN9(dev) && + INTEL_REVID(dev) <= SKL_REVID_B0 && + (ring->id == BCS || ring->id == VCS || + ring->id == VECS || ring->id == VCS2)) + desc |= GEN8_CTX_FORCE_RESTORE; + return desc; } @@ -286,13 +295,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) - temp = execlists_ctx_descriptor(ctx_obj1); + temp = execlists_ctx_descriptor(ring, ctx_obj1); else temp = 0; desc[1] = (u32)(temp >> 32); desc[0] = (u32)temp; - temp = execlists_ctx_descriptor(ctx_obj0); + temp = execlists_ctx_descriptor(ring, ctx_obj0); desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; @@ -384,6 +393,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } } + if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) { + /* + * WaIdleLiteRestore: make sure we never cause a lite + * restore with HEAD==TAIL + */ + if (req0 && req0->elsp_submitted) { + /* + * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL + * as we resubmit the request. See gen8_emit_request() + * for where we prepare the padding after the end of the + * request. + */ + struct intel_ringbuffer *ringbuf; + + ringbuf = req0->ctx->engine[ring->id].ringbuf; + req0->tail += 8; + req0->tail &= ringbuf->size - 1; + } + } + WARN_ON(req1 && req1->elsp_submitted); execlists_submit_contexts(ring, req0->ctx, req0->tail, @@ -503,18 +532,19 @@ static int execlists_context_queue(struct intel_engine_cs *ring, * If there isn't a request associated with this submission, * create one as a temporary holder. */ - WARN(1, "execlist context submission without request"); request = kzalloc(sizeof(*request), GFP_KERNEL); if (request == NULL) return -ENOMEM; request->ring = ring; request->ctx = to; + kref_init(&request->ref); + request->uniq = dev_priv->request_uniq++; + i915_gem_context_reference(request->ctx); } else { + i915_gem_request_reference(request); WARN_ON(to != request->ctx); } request->tail = tail; - i915_gem_request_reference(request); - i915_gem_context_reference(request->ctx); intel_runtime_pm_get(dev_priv); @@ -611,7 +641,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, * @vmas: list of vmas. * @batch_obj: the batchbuffer to submit. * @exec_start: batchbuffer start virtual address pointer. - * @flags: translated execbuffer call flags. + * @dispatch_flags: translated execbuffer call flags. * * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts * away the submission details of the execbuffer ioctl call. @@ -624,7 +654,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) + u64 exec_start, u32 dispatch_flags) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; @@ -697,10 +727,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, dev_priv->relative_constants_mode = instp_mode; } - ret = ring->emit_bb_start(ringbuf, ctx, exec_start, flags); + ret = ring->emit_bb_start(ringbuf, ctx, exec_start, dispatch_flags); if (ret) return ret; + trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); + i915_gem_execbuffer_move_to_active(vmas, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); @@ -731,7 +763,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) if (ctx_obj && (ctx != ring->default_context)) intel_lr_context_unpin(ring, ctx); intel_runtime_pm_put(dev_priv); - i915_gem_context_unreference(ctx); list_del(&req->execlist_link); i915_gem_request_unreference(req); } @@ -776,7 +807,7 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf, return 0; } -/** +/* * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload * @ringbuf: Logical Ringbuffer to advance. * @@ -785,9 +816,10 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf, * on a queue waiting for the ELSP to be ready to accept a new context submission. At that * point, the tail *inside* the context is updated and the ELSP written to. */ -void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, - struct intel_context *ctx, - struct drm_i915_gem_request *request) +static void +intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, + struct intel_context *ctx, + struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; @@ -876,12 +908,9 @@ static int logical_ring_alloc_request(struct intel_engine_cs *ring, return ret; } - /* Hold a reference to the context this request belongs to - * (we will need it when the time comes to emit/retire the - * request). - */ request->ctx = ctx; i915_gem_context_reference(request->ctx); + request->ringbuf = ctx->engine[ring->id].ringbuf; ring->outstanding_lazy_request = request; return 0; @@ -1140,11 +1169,22 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return init_workarounds_ring(ring); } +static int gen9_init_render_ring(struct intel_engine_cs *ring) +{ + int ret; + + ret = gen8_init_common_ring(ring); + if (ret) + return ret; + + return init_workarounds_ring(ring); +} + static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, struct intel_context *ctx, - u64 offset, unsigned flags) + u64 offset, unsigned dispatch_flags) { - bool ppgtt = !(flags & I915_DISPATCH_SECURE); + bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; ret = intel_logical_ring_begin(ringbuf, ctx, 4); @@ -1295,7 +1335,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf, u32 cmd; int ret; - ret = intel_logical_ring_begin(ringbuf, request->ctx, 6); + /* + * Reserve space for 2 NOOPs at the end of each request to be + * used as a workaround for not being allowed to do lite + * restore with HEAD==TAIL (WaIdleLiteRestore). + */ + ret = intel_logical_ring_begin(ringbuf, request->ctx, 8); if (ret) return ret; @@ -1313,9 +1358,50 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf, intel_logical_ring_emit(ringbuf, MI_NOOP); intel_logical_ring_advance_and_submit(ringbuf, request->ctx, request); + /* + * Here we add two extra NOOPs as padding to avoid + * lite restore of a context with HEAD==TAIL. + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + return 0; } +static int intel_lr_context_render_state_init(struct intel_engine_cs *ring, + struct intel_context *ctx) +{ + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct render_state so; + struct drm_i915_file_private *file_priv = ctx->file_priv; + struct drm_file *file = file_priv ? file_priv->file : NULL; + int ret; + + ret = i915_gem_render_state_prepare(ring, &so); + if (ret) + return ret; + + if (so.rodata == NULL) + return 0; + + ret = ring->emit_bb_start(ringbuf, + ctx, + so.ggtt_offset, + I915_DISPATCH_SECURE); + if (ret) + goto out; + + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); + + ret = __i915_add_request(ring, file, so.obj); + /* intel_logical_ring_add_request moves object to inactive if it + * fails */ +out: + i915_gem_render_state_fini(&so); + return ret; +} + static int gen8_init_rcs_context(struct intel_engine_cs *ring, struct intel_context *ctx) { @@ -1399,7 +1485,10 @@ static int logical_render_ring_init(struct drm_device *dev) if (HAS_L3_DPF(dev)) ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - ring->init_hw = gen8_init_render_ring; + if (INTEL_INFO(dev)->gen >= 9) + ring->init_hw = gen9_init_render_ring; + else + ring->init_hw = gen8_init_render_ring; ring->init_context = gen8_init_rcs_context; ring->cleanup = intel_fini_pipe_control; ring->get_seqno = gen8_get_seqno; @@ -1581,37 +1670,47 @@ cleanup_render_ring: return ret; } -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, - struct intel_context *ctx) +static u32 +make_rpcs(struct drm_device *dev) { - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; - struct render_state so; - struct drm_i915_file_private *file_priv = ctx->file_priv; - struct drm_file *file = file_priv ? file_priv->file : NULL; - int ret; + u32 rpcs = 0; - ret = i915_gem_render_state_prepare(ring, &so); - if (ret) - return ret; - - if (so.rodata == NULL) + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_INFO(dev)->gen < 9) return 0; - ret = ring->emit_bb_start(ringbuf, - ctx, - so.ggtt_offset, - I915_DISPATCH_SECURE); - if (ret) - goto out; + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (INTEL_INFO(dev)->has_slice_pg) { + rpcs |= GEN8_RPCS_S_CNT_ENABLE; + rpcs |= INTEL_INFO(dev)->slice_total << + GEN8_RPCS_S_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); + if (INTEL_INFO(dev)->has_subslice_pg) { + rpcs |= GEN8_RPCS_SS_CNT_ENABLE; + rpcs |= INTEL_INFO(dev)->subslice_per_slice << + GEN8_RPCS_SS_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } - ret = __i915_add_request(ring, file, so.obj); - /* intel_logical_ring_add_request moves object to inactive if it - * fails */ -out: - i915_gem_render_state_fini(&so); - return ret; + if (INTEL_INFO(dev)->has_eu_pg) { + rpcs |= INTEL_INFO(dev)->eu_per_subslice << + GEN8_RPCS_EU_MIN_SHIFT; + rpcs |= INTEL_INFO(dev)->eu_per_subslice << + GEN8_RPCS_EU_MAX_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; } static int @@ -1659,7 +1758,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); reg_state[CTX_CONTEXT_CONTROL+1] = - _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT); + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); @@ -1706,18 +1806,18 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); - reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]); - reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]); - reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]); - reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]); - reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]); - reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]); - reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]); - reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]); + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr); + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr); + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr); + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr); + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr); + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr); + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr); + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr); if (ring->id == RCS) { reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; - reg_state[CTX_R_PWR_CLK_STATE+1] = 0; + reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE; + reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev); } kunmap_atomic(reg_state); @@ -1925,3 +2025,38 @@ error_unpin_ctx: drm_gem_object_unreference(&ctx_obj->base); return ret; } + +void intel_lr_context_reset(struct drm_device *dev, + struct intel_context *ctx) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + int i; + + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = + ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = + ctx->engine[ring->id].ringbuf; + uint32_t *reg_state; + struct page *page; + + if (!ctx_obj) + continue; + + if (i915_gem_object_get_pages(ctx_obj)) { + WARN(1, "Failed get_pages for context obj\n"); + continue; + } + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_HEAD+1] = 0; + reg_state[CTX_RING_TAIL+1] = 0; + + kunmap_atomic(reg_state); + + ringbuf->head = 0; + ringbuf->tail = 0; + } +} |