diff options
author | Michał Winiarski <michal.winiarski@intel.com> | 2019-09-26 16:31:41 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-09-26 20:44:35 +0300 |
commit | e1237523749e342d9ffb499a636ab6860a554cba (patch) | |
tree | c77b14c218a5a3fca0e994e8e4904c8a1c17e505 /drivers/gpu/drm | |
parent | 5311f5171e98e6958fe5cf1c424a6196115baeb6 (diff) | |
download | linux-e1237523749e342d9ffb499a636ab6860a554cba.tar.xz |
drm/i915/execlists: Use per-process HWSP as scratch
Some of our commands (MI_FLUSH_DW / PIPE_CONTROL) require a post-sync write
operation to be performed. Currently we're using dedicated VMA for
PIPE_CONTROL and global HWSP for MI_FLUSH_DW.
On execlists platforms, each of our contexts has an area that can be
used as scratch space. Let's use that instead.
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 45 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.h | 4 |
3 files changed, 17 insertions, 35 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 3039cef64b11..e64db4c13df6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -90,9 +90,6 @@ enum intel_gt_scratch_field { INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, /* 8 bytes */ - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128, - - /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, /* 8 bytes */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ab725a6ca0ac..fa385218ce92 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2308,12 +2308,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 slm_offset(struct intel_engine_cs *engine) -{ - return intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); -} - /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -2342,10 +2336,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* Actual scratch location is at 128 bytes offset */ batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - slm_offset(engine)); + LRC_PPHWSP_SCRATCH_ADDR); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -3052,7 +3046,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) } *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ intel_ring_advance(request, cs); @@ -3063,10 +3057,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -3088,7 +3078,7 @@ static int gen8_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; /* * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL @@ -3121,7 +3111,7 @@ static int gen8_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); if (dc_flush_wa) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); @@ -3134,11 +3124,6 @@ static int gen8_emit_flush_render(struct i915_request *request, static int gen11_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - const u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 *cs; u32 flags = 0; @@ -3151,13 +3136,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3175,13 +3160,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3196,10 +3181,6 @@ static u32 preparser_disable(bool state) static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { - const u32 scratch_addr = - intel_gt_scratch_offset(request->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 flags = 0; u32 *cs; @@ -3210,7 +3191,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; @@ -3219,7 +3200,7 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3235,7 +3216,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; @@ -3251,7 +3232,7 @@ static int gen12_emit_flush_render(struct i915_request *request, */ *cs++ = preparser_disable(true); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); *cs++ = preparser_disable(false); intel_ring_advance(request, cs); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index dc0252e0589e..66ac616361c1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -104,6 +104,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) + void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, |