summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/selftests/intel_hangcheck.c')
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_hangcheck.c103
1 files changed, 49 insertions, 54 deletions
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 0aadbd9c7d56..7b6f3bea9ef8 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */
igt_global_reset_lock(i915);
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
- mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error);
i915_reset(i915, ALL_ENGINES, NULL);
@@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
}
- mutex_unlock(&i915->drm.struct_mutex);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
igt_global_reset_unlock(i915);
if (i915_terminally_wedged(&i915->gpu_error))
@@ -388,22 +384,21 @@ static int igt_global_reset(void *arg)
static int igt_wedged_reset(void *arg)
{
struct drm_i915_private *i915 = arg;
+ intel_wakeref_t wakeref;
/* Check that we can recover a wedged device with a GPU reset */
igt_global_reset_lock(i915);
- mutex_lock(&i915->drm.struct_mutex);
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
i915_gem_set_wedged(i915);
- GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+ mutex_lock(&i915->drm.struct_mutex);
+ GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
i915_reset(i915, ALL_ENGINES, NULL);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
-
- intel_runtime_pm_put(i915);
mutex_unlock(&i915->drm.struct_mutex);
+
+ intel_runtime_pm_put(i915, wakeref);
igt_global_reset_unlock(i915);
return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
@@ -454,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
do {
- u32 seqno = intel_engine_get_seqno(engine);
-
if (active) {
struct i915_request *rq;
@@ -484,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break;
}
- GEM_BUG_ON(!rq->global_seqno);
- seqno = rq->global_seqno - 1;
i915_request_put(rq);
}
@@ -501,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break;
}
- reset_engine_count += active;
if (i915_reset_engine_count(&i915->gpu_error, engine) !=
- reset_engine_count) {
- pr_err("%s engine reset %srecorded!\n",
- engine->name, active ? "not " : "");
+ ++reset_engine_count) {
+ pr_err("%s engine reset not recorded!\n",
+ engine->name);
err = -EINVAL;
break;
}
- if (!wait_for_idle(engine)) {
+ if (!i915_reset_flush(i915)) {
struct drm_printer p =
drm_info_printer(i915->drm.dev);
@@ -733,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
do {
- u32 seqno = intel_engine_get_seqno(engine);
struct i915_request *rq = NULL;
if (flags & TEST_ACTIVE) {
@@ -761,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
err = -EIO;
break;
}
-
- GEM_BUG_ON(!rq->global_seqno);
- seqno = rq->global_seqno - 1;
}
err = i915_reset_engine(engine, NULL);
@@ -800,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
reported = i915_reset_engine_count(&i915->gpu_error, engine);
reported -= threads[engine->id].resets;
- if (reported != (flags & TEST_ACTIVE ? count : 0)) {
- pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
- engine->name, test_name, count, reported,
- (flags & TEST_ACTIVE ? count : 0));
+ if (reported != count) {
+ pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+ engine->name, test_name, count, reported);
if (!err)
err = -EINVAL;
}
@@ -902,20 +887,13 @@ static int igt_reset_engines(void *arg)
return 0;
}
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
{
- struct i915_gpu_error *error = &rq->i915->gpu_error;
- u32 reset_count = i915_reset_count(error);
-
- error->stalled_mask = mask;
-
- /* set_bit() must be after we have setup the backchannel (mask) */
- smp_mb__before_atomic();
- set_bit(I915_RESET_HANDOFF, &error->flags);
+ u32 count = i915_reset_count(&i915->gpu_error);
- wake_up_all(&error->wait_queue);
+ i915_reset(i915, mask, NULL);
- return reset_count;
+ return count;
}
static int igt_reset_wait(void *arg)
@@ -961,7 +939,7 @@ static int igt_reset_wait(void *arg)
goto out_rq;
}
- reset_count = fake_hangcheck(rq, ALL_ENGINES);
+ reset_count = fake_hangcheck(i915, ALL_ENGINES);
timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
if (timeout < 0) {
@@ -971,7 +949,6 @@ static int igt_reset_wait(void *arg)
goto out_rq;
}
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
@@ -1150,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
wait_for_completion(&arg.completion);
- if (wait_for(waitqueue_active(&rq->execute), 10)) {
+ if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
struct drm_printer p = drm_info_printer(i915->drm.dev);
pr_err("igt/evict_vma kthread did not wait\n");
@@ -1161,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
}
out_reset:
- fake_hangcheck(rq, intel_engine_flag(rq->engine));
+ fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
if (tsk) {
struct igt_wedge_me w;
@@ -1340,12 +1317,7 @@ static int igt_reset_queue(void *arg)
goto fini;
}
- reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
- i915_reset(i915, ENGINE_MASK(id), NULL);
-
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
- &i915->gpu_error.flags));
+ reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
if (prev->fence.error != -EIO) {
pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1564,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
pr_err("%s(%s): Failed to start request %llx, at %x\n",
__func__, engine->name,
rq->fence.seqno, hws_seqno(&h, rq));
+ i915_gem_set_wedged(i915);
err = -EIO;
}
@@ -1587,7 +1560,6 @@ out:
static void force_reset(struct drm_i915_private *i915)
{
i915_gem_set_wedged(i915);
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
i915_reset(i915, 0, NULL);
}
@@ -1600,6 +1572,7 @@ static int igt_atomic_reset(void *arg)
{ }
};
struct drm_i915_private *i915 = arg;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that the resets are usable from atomic context */
@@ -1609,13 +1582,33 @@ static int igt_atomic_reset(void *arg)
igt_global_reset_lock(i915);
mutex_lock(&i915->drm.struct_mutex);
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
/* Flush any requests before we get started and check basics */
force_reset(i915);
if (i915_terminally_wedged(&i915->gpu_error))
goto unlock;
+ if (intel_has_gpu_reset(i915)) {
+ const typeof(*phases) *p;
+
+ for (p = phases; p->name; p++) {
+ GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+ p->critical_section_begin();
+ err = intel_gpu_reset(i915, ALL_ENGINES);
+ p->critical_section_end();
+
+ if (err) {
+ pr_err("intel_gpu_reset failed under %s\n",
+ p->name);
+ goto out;
+ }
+ }
+
+ force_reset(i915);
+ }
+
if (intel_has_reset_engine(i915)) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1636,7 +1629,7 @@ out:
force_reset(i915);
unlock:
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
igt_global_reset_unlock(i915);
@@ -1660,6 +1653,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_handle_error),
SUBTEST(igt_atomic_reset),
};
+ intel_wakeref_t wakeref;
bool saved_hangcheck;
int err;
@@ -1669,8 +1663,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
if (i915_terminally_wedged(&i915->gpu_error))
return -EIO; /* we're long past hope of a successful reset */
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
+ drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
err = i915_subtests(tests, i915);
@@ -1679,7 +1674,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
mutex_unlock(&i915->drm.struct_mutex);
i915_modparams.enable_hangcheck = saved_hangcheck;
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
return err;
}