diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-10-23 16:31:08 +0300 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-10-24 01:52:10 +0300 |
commit | 058179e72e0956a2dfe4927db6cbe5fbfb2406aa (patch) | |
tree | c187567f6a2c286a45d28f88189313df6856ce31 /drivers/gpu/drm/i915/i915_gpu_error.c | |
parent | 2e0986a58cc4f2e7f9e7ede19ec32b9c116d0068 (diff) | |
download | linux-058179e72e0956a2dfe4927db6cbe5fbfb2406aa.tar.xz |
drm/i915/gt: Replace hangcheck by heartbeats
Replace sampling the engine state every so often with a periodic
heartbeat request to measure the health of an engine. This is coupled
with the forced-preemption to allow long running requests to survive so
long as they do not block other users.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 33 |
1 files changed, 4 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5cf4eed5add8..47239df653f2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -534,10 +534,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", - jiffies_to_msecs(ee->hangcheck_timestamp - epoch), - ee->hangcheck_timestamp, - ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { @@ -679,11 +675,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); - err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); - err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", - error->capture, - jiffies_to_msecs(jiffies - error->capture), - jiffies_to_msecs(error->capture - error->epoch)); + err_printf(m, "Capture: %lu jiffies; %d ms ago\n", + error->capture, jiffies_to_msecs(jiffies - error->capture)); for (ee = error->engine; ee; ee = ee->next) err_printf(m, "Active process (on ring %s): %s [%d]\n", @@ -742,7 +735,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); for (ee = error->engine; ee; ee = ee->next) - error_print_engine(m, ee, error->epoch); + error_print_engine(m, ee, error->capture); for (ee = error->engine; ee; ee = ee->next) { const struct drm_i915_error_object *obj; @@ -770,7 +763,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, for (j = 0; j < ee->num_requests; j++) error_print_request(m, " ", &ee->requests[j], - error->epoch); + error->capture); } print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer); @@ -1144,8 +1137,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); - if (!ee->idle) - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1657,20 +1648,6 @@ static void capture_params(struct i915_gpu_state *error) i915_params_copy(&error->params, &i915_modparams); } -static unsigned long capture_find_epoch(const struct i915_gpu_state *error) -{ - const struct drm_i915_error_engine *ee; - unsigned long epoch = error->capture; - - for (ee = error->engine; ee; ee = ee->next) { - if (ee->hangcheck_timestamp && - time_before(ee->hangcheck_timestamp, epoch)) - epoch = ee->hangcheck_timestamp; - } - - return epoch; -} - static void capture_finish(struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &error->i915->ggtt; @@ -1722,8 +1699,6 @@ i915_capture_gpu_state(struct drm_i915_private *i915) error->overlay = intel_overlay_capture_error_state(i915); error->display = intel_display_capture_error_state(i915); - error->epoch = capture_find_epoch(error); - capture_finish(error); compress_fini(&compress); |