diff options
author | Mika Kuoppala <mika.kuoppala@linux.intel.com> | 2017-09-19 17:41:28 +0300 |
---|---|---|
committer | Mika Kuoppala <mika.kuoppala@intel.com> | 2017-09-20 13:56:05 +0300 |
commit | 87de8d5613a7221abcce0b680b0da3c98b0871c8 (patch) | |
tree | d9c2596452035628fd8363ec04cab1074a124212 /drivers/gpu/drm/i915/intel_uncore.c | |
parent | d81fb7fd9436e81fda67e5bc8ed0713aa28d3db2 (diff) | |
download | linux-87de8d5613a7221abcce0b680b0da3c98b0871c8.tar.xz |
drm/i915: Stop engines before reset
On kbl evidence indicates that even if the hardware happily
tells us to proceed with reset, it really isn't ready.
Resetting a freely running batchbuffer after we have ack for readiness,
still can cause a system hang.
We also have similar experiences on older gens. So now
attempt to stop engines before proceeding for reset, on all
gens where we have a gpu reset. This has shown to improve reset
reliability and reduce the risk of losing the machine.
v2: Add fixme for wa (Joonas)
Testcase: igt/prime_busy/hang-* # kbl
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170919144128.25506-1-mika.kuoppala@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/intel_uncore.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_uncore.c | 75 |
1 files changed, 43 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 97525de2cee4..fdd7f93acb4f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1354,33 +1354,39 @@ int i915_reg_read_ioctl(struct drm_device *dev, return ret; } -static void gen3_stop_rings(struct drm_i915_private *dev_priv) +static void gen3_stop_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + if (intel_wait_for_register_fw(dev_priv, + mode, + MODE_IDLE, + MODE_IDLE, + 500)) + DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", + engine->name); + + I915_WRITE_FW(RING_CTL(base), 0); + I915_WRITE_FW(RING_HEAD(base), 0); + I915_WRITE_FW(RING_TAIL(base), 0); + + /* Check acts as a post */ + if (I915_READ_FW(RING_HEAD(base)) != 0) + DRM_DEBUG_DRIVER("%s: ring head not parked\n", + engine->name); +} + +static void i915_stop_engines(struct drm_i915_private *dev_priv, + unsigned engine_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { - const u32 base = engine->mmio_base; - const i915_reg_t mode = RING_MI_MODE(base); - - I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register_fw(dev_priv, - mode, - MODE_IDLE, - MODE_IDLE, - 500)) - DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", - engine->name); - - I915_WRITE_FW(RING_CTL(base), 0); - I915_WRITE_FW(RING_HEAD(base), 0); - I915_WRITE_FW(RING_TAIL(base), 0); - - /* Check acts as a post */ - if (I915_READ_FW(RING_HEAD(base)) != 0) - DRM_DEBUG_DRIVER("%s: ring head not parked\n", - engine->name); - } + for_each_engine_masked(engine, dev_priv, engine_mask, id) + gen3_stop_engine(engine); } static bool i915_reset_complete(struct pci_dev *pdev) @@ -1415,9 +1421,6 @@ static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct pci_dev *pdev = dev_priv->drm.pdev; - /* Stop engines before we reset; see g4x_do_reset() below for why. */ - gen3_stop_rings(dev_priv); - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for(g4x_reset_complete(pdev), 500); } @@ -1432,12 +1435,6 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); POSTING_READ(VDECCLK_GATE_D); - /* We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - */ - gen3_stop_rings(dev_priv); - pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for(g4x_reset_complete(pdev), 500); @@ -1742,6 +1739,20 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); for (retry = 0; retry < 3; retry++) { + + /* We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + i915_stop_engines(dev_priv, engine_mask); + ret = reset(dev_priv, engine_mask); if (ret != -ETIMEDOUT) break; |