diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_debugfs.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_breadcrumbs.c | 69 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_engine_cs.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 6 |
6 files changed, 56 insertions, 67 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9bd41581b592..0627e170ea25 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -787,8 +787,6 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - seq_printf(m, "Current user interrupts (%s): %lx\n", - engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups)); spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1434,11 +1432,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) engine->hangcheck.seqno, seqno[id], engine->last_submitted_seqno); - seq_printf(m, "\twaiters? %d\n", - intel_engine_has_waiter(engine)); - seq_printf(m, "\tuser interrupts = %lx [current %lx]\n", - engine->hangcheck.user_interrupts, - READ_ONCE(engine->breadcrumbs.irq_wakeups)); + seq_printf(m, "\twaiters? %s, fake irq active? %s\n", + yesno(intel_engine_has_waiter(engine)), + yesno(test_bit(engine->id, + &dev_priv->gpu_error.missed_irq_rings))); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3fb574974343..e68b4c62be88 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2524,7 +2524,6 @@ i915_gem_idle_work_handler(struct work_struct *work) container_of(work, typeof(*dev_priv), gt.idle_work.work); struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; - unsigned int stuck_engines; bool rearm_hangcheck; if (!READ_ONCE(dev_priv->gt.awake)) @@ -2554,15 +2553,6 @@ i915_gem_idle_work_handler(struct work_struct *work) dev_priv->gt.awake = false; rearm_hangcheck = false; - /* As we have disabled hangcheck, we need to unstick any waiters still - * hanging around. However, as we may be racing against the interrupt - * handler or the waiters themselves, we skip enabling the fake-irq. - */ - stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) - DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", - stuck_engines); - if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 591f452ece68..ebb83d5a448b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) { + if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); - engine->breadcrumbs.irq_wakeups++; - } } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static unsigned long kick_waiters(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups); - - if (engine->hangcheck.user_interrupts == irq_count && - !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - DRM_ERROR("Hangcheck timer elapsed... %s idle\n", - engine->name); - - intel_engine_enable_fake_irq(engine); - } - - return irq_count; -} /* * This is called when the chip hasn't reported back with completed * batchbuffers in a long time. We keep track per ring seqno progress and @@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) bool busy = intel_engine_has_waiter(engine); u64 acthd; u32 seqno; - unsigned user_interrupts; semaphore_clear_deadlocks(dev_priv); @@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); - /* Reset stuck interrupts between batch advances */ - user_interrupts = 0; - if (engine->hangcheck.seqno == seqno) { if (!intel_engine_is_active(engine)) { engine->hangcheck.action = HANGCHECK_IDLE; if (busy) { /* Safeguard against driver failure */ - user_interrupts = kick_waiters(engine); engine->hangcheck.score += BUSY; } } else { @@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->hangcheck.seqno = seqno; engine->hangcheck.acthd = acthd; - engine->hangcheck.user_interrupts = user_interrupts; busy_count += busy; } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 90867446f1a5..7be9af1d5424 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,40 @@ #include "i915_drv.h" +static void intel_breadcrumbs_hangcheck(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (!b->irq_enabled) + return; + + if (time_before(jiffies, b->timeout)) { + mod_timer(&b->hangcheck, b->timeout); + return; + } + + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); +} + +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -51,13 +85,6 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; - /* Make sure the current hangcheck doesn't falsely accuse a just - * started irq handler from missing an interrupt (because the - * interrupt count still matches the stale value from when - * the irq handler was disabled, many hangchecks ago). - */ - engine->breadcrumbs.irq_wakeups++; - spin_lock_irq(&engine->i915->irq_lock); engine->irq_enable(engine); spin_unlock_irq(&engine->i915->irq_lock); @@ -98,17 +125,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) } if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) + test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { mod_timer(&b->fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! - */ - i915_queue_hangcheck(i915); + } else { + /* Ensure we never sleep indefinitely */ + GEM_BUG_ON(!time_after(b->timeout, jiffies)); + mod_timer(&b->hangcheck, b->timeout); + } } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) @@ -219,6 +242,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); + b->timeout = wait_timeout(); b->first_wait = to_wait(next); smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current @@ -245,6 +269,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); + b->timeout = wait_timeout(); b->first_wait = wait; smp_store_mb(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must @@ -277,11 +302,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, return first; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine) -{ - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); -} - static inline bool chain_wakeup(struct rb_node *rb, int priority) { return rb && to_wait(rb)->tsk->prio <= priority; @@ -359,6 +379,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ + b->timeout = wait_timeout(); b->first_wait = to_wait(next); smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) @@ -536,6 +557,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); + setup_timer(&b->hangcheck, + intel_breadcrumbs_hangcheck, + (unsigned long)engine); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current @@ -560,6 +584,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); + del_timer_sync(&b->hangcheck); del_timer_sync(&b->fake_irq); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e9b301ae2d0c..0dd3d1de18aa 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -164,6 +164,7 @@ cleanup: void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } static void intel_engine_init_requests(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index bf9a6e56e719..c7a6db310f9f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action { struct intel_engine_hangcheck { u64 acthd; - unsigned long user_interrupts; u32 seqno; int score; enum intel_engine_hangcheck_action action; @@ -173,7 +172,6 @@ struct intel_engine_cs { */ struct intel_breadcrumbs { struct task_struct *irq_seqno_bh; /* bh for user interrupts */ - unsigned long irq_wakeups; bool irq_posted; spinlock_t lock; /* protects the lists of requests */ @@ -183,6 +181,9 @@ struct intel_engine_cs { struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ + struct timer_list hangcheck; /* detect missed interrupts */ + + unsigned long timeout; bool irq_enabled : 1; bool rpm_wakelock : 1; @@ -560,7 +561,6 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) return wakeup; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); |