summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c15
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h10
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c38
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c5
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c11
5 files changed, 78 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fe3d46ee4ddc..e5b31e29382c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -331,6 +331,8 @@ static int i915_getparam(struct drm_device *dev, void *data,
break;
case I915_PARAM_HAS_GPU_RESET:
value = i915.enable_hangcheck && intel_has_gpu_reset(dev_priv);
+ if (value && intel_has_reset_engine(dev_priv))
+ value = 2;
break;
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev_priv);
@@ -1915,6 +1917,19 @@ error:
goto finish;
}
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ */
+int i915_reset_engine(struct intel_engine_cs *engine)
+{
+ /* FIXME: replace me with engine reset sequence */
+ return -ENODEV;
+}
+
static int i915_pm_suspend(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 69219b5d1198..4220abe1f28b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -752,6 +752,7 @@ struct intel_csr {
func(has_csr); \
func(has_ddi); \
func(has_dp_mst); \
+ func(has_reset_engine); \
func(has_fbc); \
func(has_fpga_dbg); \
func(has_full_ppgtt); \
@@ -1549,6 +1550,12 @@ struct i915_gpu_error {
* inspect the bit and do the reset directly, otherwise the worker
* waits for the struct_mutex.
*
+ * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
+ * acquire the struct_mutex to reset an engine, we need an explicit
+ * flag to prevent two concurrent reset attempts in the same engine.
+ * As the number of engines continues to grow, allocate the flags from
+ * the most significant bits.
+ *
* #I915_WEDGED - If reset fails and we can no longer use the GPU,
* we set the #I915_WEDGED bit. Prior to command submission, e.g.
* i915_gem_request_alloc(), this bit is checked and the sequence
@@ -1558,6 +1565,7 @@ struct i915_gpu_error {
#define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1
#define I915_WEDGED (BITS_PER_LONG - 1)
+#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3092,6 +3100,8 @@ extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv);
+extern int i915_reset_engine(struct intel_engine_cs *engine);
+extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8e9f4378b5a7..f25e73fe567c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2715,6 +2715,8 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
u32 engine_mask,
const char *fmt, ...)
{
+ struct intel_engine_cs *engine;
+ unsigned int tmp;
va_list args;
char error_msg[80];
@@ -2734,9 +2736,31 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
i915_capture_error_state(dev_priv, engine_mask, error_msg);
i915_clear_error_registers(dev_priv);
+ /*
+ * Try engine reset when available. We fall back to full reset if
+ * single reset fails.
+ */
+ if (intel_has_reset_engine(dev_priv)) {
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ BUILD_BUG_ON(I915_RESET_HANDOFF >= I915_RESET_ENGINE);
+ if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &dev_priv->gpu_error.flags))
+ continue;
+
+ if (i915_reset_engine(engine) == 0)
+ engine_mask &= ~intel_engine_flag(engine);
+
+ clear_bit(I915_RESET_ENGINE + engine->id,
+ &dev_priv->gpu_error.flags);
+ wake_up_bit(&dev_priv->gpu_error.flags,
+ I915_RESET_ENGINE + engine->id);
+ }
+ }
+
if (!engine_mask)
goto out;
+ /* Full reset needs the mutex, stop any other user trying to do so. */
if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
wait_event(dev_priv->gpu_error.reset_queue,
!test_bit(I915_RESET_BACKOFF,
@@ -2744,8 +2768,22 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
goto out;
}
+ /* Prevent any other reset-engine attempt. */
+ for_each_engine(engine, dev_priv, tmp) {
+ while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &dev_priv->gpu_error.flags))
+ wait_on_bit(&dev_priv->gpu_error.flags,
+ I915_RESET_ENGINE + engine->id,
+ TASK_UNINTERRUPTIBLE);
+ }
+
i915_reset_device(dev_priv);
+ for_each_engine(engine, dev_priv, tmp) {
+ clear_bit(I915_RESET_ENGINE + engine->id,
+ &dev_priv->gpu_error.flags);
+ }
+
clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
wake_up_all(&dev_priv->gpu_error.reset_queue);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 506ec32b9e53..04aaf553e3fa 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -310,7 +310,8 @@ static const struct intel_device_info intel_haswell_info = {
BDW_COLORS, \
.has_logical_ring_contexts = 1, \
.has_full_48bit_ppgtt = 1, \
- .has_64bit_reloc = 1
+ .has_64bit_reloc = 1, \
+ .has_reset_engine = 1
#define BDW_PLATFORM \
BDW_FEATURES, \
@@ -342,6 +343,7 @@ static const struct intel_device_info intel_cherryview_info = {
.has_gmch_display = 1,
.has_aliasing_ppgtt = 1,
.has_full_ppgtt = 1,
+ .has_reset_engine = 1,
.display_mmio_offset = VLV_DISPLAY_BASE,
GEN_CHV_PIPEOFFSETS,
CURSOR_OFFSETS,
@@ -387,6 +389,7 @@ static const struct intel_device_info intel_skylake_gt3_info = {
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
.has_full_48bit_ppgtt = 1, \
+ .has_reset_engine = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
IVB_CURSOR_OFFSETS, \
BDW_COLORS
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 9882724bc2b6..1ed3dd8df850 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1719,6 +1719,17 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
return intel_get_gpu_reset(dev_priv) != NULL;
}
+/*
+ * When GuC submission is enabled, GuC manages ELSP and can initiate the
+ * engine reset too. For now, fall back to full GPU reset if it is enabled.
+ */
+bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
+{
+ return (dev_priv->info.has_reset_engine &&
+ !dev_priv->guc.execbuf_client &&
+ i915.reset >= 2);
+}
+
int intel_guc_reset(struct drm_i915_private *dev_priv)
{
int ret;