From d60de81da48aa25b06a015b839906243710e7253 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 10 Jan 2015 18:02:22 -0800 Subject: drm/i915: Improve HiZ throughput on Cherryview. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found by reading the HIZ_CHICKEN documentation. Improves performance in a HiZ microbenchmark by around 50%. Improves performance in OglZBuffer by around 18%. Thanks to Chris Wilson for helping me figure out where to put this. Signed-off-by: Kenneth Graunke Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 12a36f0ca53d..dabc1d8d2ae3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -836,6 +836,9 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) HDC_FORCE_NON_COHERENT | HDC_DONOT_FETCH_MEM_WHEN_MASKED); + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + return 0; } -- cgit v1.2.3 From 2701fc43562b55f5db0139ef6a7b13c2518ed6c0 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 13 Jan 2015 12:46:52 -0800 Subject: drm/i915: Enable the HiZ RAW Stall Optimization on Broadwell. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an important optimization for avoiding read-after-write (RAW) stalls in the HiZ buffer. Certain workloads would run very slowly with HiZ enabled, but run much faster with the "hiz=false" driconf option. With this patch, they run at full speed even with HiZ. Improves performance in OglVSInstancing by 3.2x on Broadwell GT3e (Iris Pro 6200). Thanks to Jesse Barnes and Ben Widawsky for their help in tracking this down. Thanks to Chris Wilson for showing me the new workarounds system. Signed-off-by: Kenneth Graunke Cc: Jesse Barnes Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dabc1d8d2ae3..0df15a4ad47f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -796,6 +796,16 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) HDC_DONOT_FETCH_MEM_WHEN_MASKED | (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for Broadwell; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + /* Wa4x4STCOptimizationDisable:bdw */ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); -- cgit v1.2.3 From 973a5b06a096f1a9494ebe94c78d809459f4dc74 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 13 Jan 2015 12:46:53 -0800 Subject: drm/i915: Ensure the HiZ RAW Stall Optimization is on for Cherryview. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an important optimization for avoiding read-after-write (RAW) stalls in the HiZ buffer. Certain workloads would run very slowly with HiZ enabled, but run much faster with the "hiz=false" driconf option. With this patch, they run at full speed even with HiZ. Increases performance in OglVSInstancing by about 2.7x on Braswell. Signed-off-by: Kenneth Graunke Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0df15a4ad47f..23020d67329b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -846,6 +846,11 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) HDC_FORCE_NON_COHERENT | HDC_DONOT_FETCH_MEM_WHEN_MASKED); + /* According to the CACHE_MODE_0 default value documentation, some + * CHV platforms disable this optimization by default. Turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + /* Improve HiZ throughput on CHV. */ WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); -- cgit v1.2.3